From ce93c366c451e27a49583cbb809bf5579a4bcf15 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 17 Oct 2024 19:12:56 -0400 Subject: [PATCH 01/76] Migrate NVText Normalizing APIs to Pylibcudf (#17072) Apart of #15162. Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17072 --- .../api_docs/pylibcudf/nvtext/index.rst | 1 + .../api_docs/pylibcudf/nvtext/normalize.rst | 6 ++ .../cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx | 13 ++-- python/cudf/cudf/_lib/nvtext/normalize.pyx | 38 ++++------- .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 2 +- .../pylibcudf/pylibcudf/nvtext/__init__.pxd | 4 +- python/pylibcudf/pylibcudf/nvtext/__init__.py | 10 ++- .../pylibcudf/pylibcudf/nvtext/normalize.pxd | 9 +++ .../pylibcudf/pylibcudf/nvtext/normalize.pyx | 64 +++++++++++++++++++ .../pylibcudf/tests/test_nvtext_normalize.py | 42 ++++++++++++ 10 files changed, 155 insertions(+), 34 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/normalize.rst create mode 100644 python/pylibcudf/pylibcudf/nvtext/normalize.pxd create mode 100644 python/pylibcudf/pylibcudf/nvtext/normalize.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst index 58303356336..3a79c869971 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst @@ -9,3 +9,4 @@ nvtext jaccard minhash ngrams_tokenize + normalize diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/normalize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/normalize.rst new file mode 100644 index 00000000000..e496f6a45da --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/normalize.rst @@ -0,0 +1,6 @@ +========= +normalize +========= + +.. automodule:: pylibcudf.nvtext.normalize + :members: diff --git a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx index 6521116eafe..c125d92a24e 100644 --- a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx +++ b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx @@ -14,10 +14,11 @@ def ngrams_tokenize( object py_delimiter, object py_separator ): - result = nvtext.ngrams_tokenize.ngrams_tokenize( - input.to_pylibcudf(mode="read"), - ngrams, - py_delimiter.device_value.c_value, - py_separator.device_value.c_value + return Column.from_pylibcudf( + nvtext.ngrams_tokenize.ngrams_tokenize( + input.to_pylibcudf(mode="read"), + ngrams, + py_delimiter.device_value.c_value, + py_separator.device_value.c_value + ) ) - return Column.from_pylibcudf(result) diff --git a/python/cudf/cudf/_lib/nvtext/normalize.pyx b/python/cudf/cudf/_lib/nvtext/normalize.pyx index 5e86a9ce959..633bc902db1 100644 --- a/python/cudf/cudf/_lib/nvtext/normalize.pyx +++ b/python/cudf/cudf/_lib/nvtext/normalize.pyx @@ -3,36 +3,24 @@ from cudf.core.buffer import acquire_spill_lock from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.normalize cimport ( - normalize_characters as cpp_normalize_characters, - normalize_spaces as cpp_normalize_spaces, -) from cudf._lib.column cimport Column - -@acquire_spill_lock() -def normalize_spaces(Column strings): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_normalize_spaces(c_strings)) - - return Column.from_unique_ptr(move(c_result)) +from pylibcudf import nvtext @acquire_spill_lock() -def normalize_characters(Column strings, bool do_lower=True): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result +def normalize_spaces(Column input): + result = nvtext.normalize.normalize_spaces( + input.to_pylibcudf(mode="read") + ) + return Column.from_pylibcudf(result) - with nogil: - c_result = move(cpp_normalize_characters(c_strings, do_lower)) - return Column.from_unique_ptr(move(c_result)) +@acquire_spill_lock() +def normalize_characters(Column input, bool do_lower=True): + result = nvtext.normalize.normalize_characters( + input.to_pylibcudf(mode="read"), + do_lower, + ) + return Column.from_pylibcudf(result) diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt index 94df9bbbebb..e01ca3fbdd3 100644 --- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx - ngrams_tokenize.pyx + ngrams_tokenize.pyx normalize.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd index b6659827688..08dbec84090 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd @@ -6,6 +6,7 @@ from . cimport ( jaccard, minhash, ngrams_tokenize, + normalize, ) __all__ = [ @@ -13,5 +14,6 @@ __all__ = [ "generate_ngrams", "jaccard", "minhash", - "ngrams_tokenize" + "ngrams_tokenize", + "normalize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index f74633a3521..6dccf3dd9cf 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -1,6 +1,13 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . import edit_distance, generate_ngrams, jaccard, minhash, ngrams_tokenize +from . import ( + edit_distance, + generate_ngrams, + jaccard, + minhash, + ngrams_tokenize, + normalize, +) __all__ = [ "edit_distance", @@ -8,4 +15,5 @@ "jaccard", "minhash", "ngrams_tokenize", + "normalize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd new file mode 100644 index 00000000000..90676145afa --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from pylibcudf.column cimport Column + + +cpdef Column normalize_spaces(Column input) + +cpdef Column normalize_characters(Column input, bool do_lower_case) diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx new file mode 100644 index 00000000000..637d900b659 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx @@ -0,0 +1,64 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.nvtext.normalize cimport ( + normalize_characters as cpp_normalize_characters, + normalize_spaces as cpp_normalize_spaces, +) + + +cpdef Column normalize_spaces(Column input): + """ + Returns a new strings column by normalizing the whitespace in + each string in the input column. + + For details, see :cpp:func:`normalize_spaces` + + Parameters + ---------- + input : Column + Input strings + + Returns + ------- + Column + New strings columns of normalized strings. + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_normalize_spaces(input.view()) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column normalize_characters(Column input, bool do_lower_case): + """ + Normalizes strings characters for tokenizing. + + For details, see :cpp:func:`normalize_characters` + + Parameters + ---------- + input : Column + Input strings + do_lower_case : bool + If true, upper-case characters are converted to lower-case + and accents are stripped from those characters. If false, + accented and upper-case characters are not transformed. + + Returns + ------- + Column + Normalized strings column + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_normalize_characters(input.view(), do_lower_case) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py new file mode 100644 index 00000000000..fe28b83c09a --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py @@ -0,0 +1,42 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pylibcudf as plc +import pytest +from utils import assert_column_eq + + +@pytest.fixture(scope="module") +def norm_spaces_input_data(): + arr = ["a b", " c d\n", "e \t f "] + return pa.array(arr) + + +@pytest.fixture(scope="module") +def norm_chars_input_data(): + arr = ["éâîô\teaio", "ĂĆĖÑÜ", "ACENU", "$24.08", "[a,bb]"] + return pa.array(arr) + + +def test_normalize_spaces(norm_spaces_input_data): + result = plc.nvtext.normalize.normalize_spaces( + plc.interop.from_arrow(norm_spaces_input_data) + ) + expected = pa.array(["a b", "c d", "e f"]) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize("do_lower", [True, False]) +def test_normalize_characters(norm_chars_input_data, do_lower): + result = plc.nvtext.normalize.normalize_characters( + plc.interop.from_arrow(norm_chars_input_data), + do_lower, + ) + expected = pa.array( + ["eaio eaio", "acenu", "acenu", " $ 24 . 08", " [ a , bb ] "] + ) + if not do_lower: + expected = pa.array( + ["éâîô eaio", "ĂĆĖÑÜ", "ACENU", " $ 24 . 08", " [ a , bb ] "] + ) + assert_column_eq(result, expected) From 8ebf0d40669014e42b9c9079cdc1cc3ab5c4f7a8 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 17 Oct 2024 18:27:58 -0700 Subject: [PATCH 02/76] Add device aggregators used by shared memory groupby (#17031) This work is part of splitting the original bulk shared memory groupby PR #16619. It introduces two device-side element aggregators: - `shmem_element_aggregator`: aggregates data from global memory sources to shared memory targets, - `gmem_element_aggregator`: aggregates from shared memory sources to global memory targets. These two aggregators are similar to the `elementwise_aggregator` functionality. Follow-up work is tracked via #17032. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/17031 --- .../detail/aggregation/device_aggregators.cuh | 63 ++-- .../groupby/hash/global_memory_aggregator.cuh | 277 ++++++++++++++++++ .../groupby/hash/shared_memory_aggregator.cuh | 263 +++++++++++++++++ 3 files changed, 570 insertions(+), 33 deletions(-) create mode 100644 cpp/src/groupby/hash/global_memory_aggregator.cuh create mode 100644 cpp/src/groupby/hash/shared_memory_aggregator.cuh diff --git a/cpp/include/cudf/detail/aggregation/device_aggregators.cuh b/cpp/include/cudf/detail/aggregation/device_aggregators.cuh index 10be5e1d36f..204eee49a2a 100644 --- a/cpp/include/cudf/detail/aggregation/device_aggregators.cuh +++ b/cpp/include/cudf/detail/aggregation/device_aggregators.cuh @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once #include @@ -29,12 +28,31 @@ #include namespace cudf::detail { +/// Checks if an aggregation kind needs to operate on the underlying storage type +template +__device__ constexpr bool uses_underlying_type() +{ + return k == aggregation::MIN or k == aggregation::MAX or k == aggregation::SUM; +} + +/// Gets the underlying target type for the given source type and aggregation kind +template +using underlying_target_t = + cuda::std::conditional_t(), + cudf::device_storage_type_t>, + cudf::detail::target_type_t>; + +/// Gets the underlying source type for the given source type and aggregation kind +template +using underlying_source_t = + cuda::std::conditional_t(), cudf::device_storage_type_t, Source>; + template struct update_target_element { - __device__ void operator()(mutable_column_device_view target, - size_type target_index, - column_device_view source, - size_type source_index) const noexcept + __device__ void operator()(mutable_column_device_view, + size_type, + column_device_view, + size_type) const noexcept { CUDF_UNREACHABLE("Invalid source type and aggregation combination."); } @@ -51,8 +69,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_min(&target.element(target_index), static_cast(source.element(source_index))); @@ -72,8 +88,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; using DeviceTarget = device_storage_type_t; using DeviceSource = device_storage_type_t; @@ -96,8 +110,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_max(&target.element(target_index), static_cast(source.element(source_index))); @@ -117,8 +129,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; using DeviceTarget = device_storage_type_t; using DeviceSource = device_storage_type_t; @@ -141,8 +151,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_add(&target.element(target_index), static_cast(source.element(source_index))); @@ -162,8 +170,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; using DeviceTarget = device_storage_type_t; using DeviceSource = device_storage_type_t; @@ -197,10 +203,10 @@ struct update_target_from_dictionary { template ()>* = nullptr> - __device__ void operator()(mutable_column_device_view target, - size_type target_index, - column_device_view source, - size_type source_index) const noexcept + __device__ void operator()(mutable_column_device_view, + size_type, + column_device_view, + size_type) const noexcept { } }; @@ -227,8 +233,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - dispatch_type_and_aggregation( source.child(cudf::dictionary_column_view::keys_column_index).type(), k, @@ -249,8 +253,6 @@ struct update_target_element; auto value = static_cast(source.element(source_index)); cudf::detail::atomic_add(&target.element(target_index), value * value); @@ -267,8 +269,6 @@ struct update_target_element; cudf::detail::atomic_mul(&target.element(target_index), static_cast(source.element(source_index))); @@ -286,8 +286,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_add(&target.element(target_index), Target{1}); @@ -323,8 +321,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; auto old = cudf::detail::atomic_cas( &target.element(target_index), ARGMAX_SENTINEL, source_index); @@ -349,8 +345,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; auto old = cudf::detail::atomic_cas( &target.element(target_index), ARGMIN_SENTINEL, source_index); @@ -376,6 +370,9 @@ struct elementwise_aggregator { column_device_view source, size_type source_index) const noexcept { + if constexpr (k != cudf::aggregation::COUNT_ALL) { + if (source.is_null(source_index)) { return; } + } update_target_element{}(target, target_index, source, source_index); } }; diff --git a/cpp/src/groupby/hash/global_memory_aggregator.cuh b/cpp/src/groupby/hash/global_memory_aggregator.cuh new file mode 100644 index 00000000000..50e89c727ff --- /dev/null +++ b/cpp/src/groupby/hash/global_memory_aggregator.cuh @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +struct update_target_element_gmem { + __device__ void operator()(cudf::mutable_column_device_view, + cudf::size_type, + cudf::column_device_view, + cuda::std::byte*, + cudf::size_type) const noexcept + { + CUDF_UNREACHABLE("Invalid source type and aggregation combination."); + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::MIN, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using DeviceType = cudf::detail::underlying_target_t; + DeviceType* source_casted = reinterpret_cast(source); + cudf::detail::atomic_min(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::MAX, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using DeviceType = cudf::detail::underlying_target_t; + DeviceType* source_casted = reinterpret_cast(source); + cudf::detail::atomic_max(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::SUM, + cuda::std::enable_if_t() && cudf::has_atomic_support() && + !cudf::is_timestamp()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using DeviceType = cudf::detail::underlying_target_t; + DeviceType* source_casted = reinterpret_cast(source); + cudf::detail::atomic_add(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +// The shared memory will already have it squared +template +struct update_target_element_gmem< + Source, + cudf::aggregation::SUM_OF_SQUARES, + cuda::std::enable_if_t()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + Target value = static_cast(source_casted[source_index]); + + cudf::detail::atomic_add(&target.element(target_index), value); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::PRODUCT, + cuda::std::enable_if_t()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + cudf::detail::atomic_mul(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +// Assuming that the target column of COUNT_VALID, COUNT_ALL would be using fixed_width column and +// non-fixed point column +template +struct update_target_element_gmem< + Source, + cudf::aggregation::COUNT_VALID, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + cudf::detail::atomic_add(&target.element(target_index), + static_cast(source_casted[source_index])); + + // It is assumed the output for COUNT_VALID is initialized to be all valid + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::COUNT_ALL, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + cudf::detail::atomic_add(&target.element(target_index), + static_cast(source_casted[source_index])); + + // It is assumed the output for COUNT_ALL is initialized to be all valid + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::ARGMAX, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* source_casted = reinterpret_cast(source); + auto source_argmax_index = source_casted[source_index]; + auto old = cudf::detail::atomic_cas( + &target.element(target_index), cudf::detail::ARGMAX_SENTINEL, source_argmax_index); + if (old != cudf::detail::ARGMAX_SENTINEL) { + while (source_column.element(source_argmax_index) > + source_column.element(old)) { + old = + cudf::detail::atomic_cas(&target.element(target_index), old, source_argmax_index); + } + } + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; +template +struct update_target_element_gmem< + Source, + cudf::aggregation::ARGMIN, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* source_casted = reinterpret_cast(source); + auto source_argmin_index = source_casted[source_index]; + auto old = cudf::detail::atomic_cas( + &target.element(target_index), cudf::detail::ARGMIN_SENTINEL, source_argmin_index); + if (old != cudf::detail::ARGMIN_SENTINEL) { + while (source_column.element(source_argmin_index) < + source_column.element(old)) { + old = + cudf::detail::atomic_cas(&target.element(target_index), old, source_argmin_index); + } + } + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +/** + * @brief A functor that updates a single element in the target column stored in global memory by + * applying an aggregation operation to a corresponding element from a source column in shared + * memory. + * + * This functor can NOT be used for dictionary columns. + * + * This is a redundant copy replicating the behavior of `elementwise_aggregator` from + * `cudf/detail/aggregation/device_aggregators.cuh`. The key difference is that this functor accepts + * a pointer to raw bytes as the source, as `column_device_view` cannot yet be constructed from + * shared memory. + */ +struct gmem_element_aggregator { + template + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + bool* source_mask, + cudf::size_type source_index) const noexcept + { + // Early exit for all aggregation kinds since shared memory aggregation of + // `COUNT_ALL` is always valid + if (!source_mask[source_index]) { return; } + + update_target_element_gmem{}( + target, target_index, source_column, source, source_index); + } +}; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/shared_memory_aggregator.cuh b/cpp/src/groupby/hash/shared_memory_aggregator.cuh new file mode 100644 index 00000000000..9cbeeb34b86 --- /dev/null +++ b/cpp/src/groupby/hash/shared_memory_aggregator.cuh @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +struct update_target_element_shmem { + __device__ void operator()( + cuda::std::byte*, bool*, cudf::size_type, cudf::column_device_view, cudf::size_type) const + { + CUDF_UNREACHABLE("Invalid source type and aggregation combination."); + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::MIN, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using DeviceTarget = cudf::detail::underlying_target_t; + using DeviceSource = cudf::detail::underlying_source_t; + + DeviceTarget* target_casted = reinterpret_cast(target); + cudf::detail::atomic_min(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::MAX, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using DeviceTarget = cudf::detail::underlying_target_t; + using DeviceSource = cudf::detail::underlying_source_t; + + DeviceTarget* target_casted = reinterpret_cast(target); + cudf::detail::atomic_max(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::SUM, + cuda::std::enable_if_t() && cudf::has_atomic_support() && + !cudf::is_timestamp()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using DeviceTarget = cudf::detail::underlying_target_t; + using DeviceSource = cudf::detail::underlying_source_t; + + DeviceTarget* target_casted = reinterpret_cast(target); + cudf::detail::atomic_add(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::SUM_OF_SQUARES, + cuda::std::enable_if_t()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + auto value = static_cast(source.element(source_index)); + cudf::detail::atomic_add(&target_casted[target_index], value * value); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::PRODUCT, + cuda::std::enable_if_t()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + cudf::detail::atomic_mul(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::COUNT_VALID, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + // The nullability was checked prior to this call in the `shmem_element_aggregator` functor + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + cudf::detail::atomic_add(&target_casted[target_index], Target{1}); + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::COUNT_ALL, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + cudf::detail::atomic_add(&target_casted[target_index], Target{1}); + + // Assumes target is already set to be valid + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::ARGMAX, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + auto old = cudf::detail::atomic_cas( + &target_casted[target_index], cudf::detail::ARGMAX_SENTINEL, source_index); + if (old != cudf::detail::ARGMAX_SENTINEL) { + while (source.element(source_index) > source.element(old)) { + old = cudf::detail::atomic_cas(&target_casted[target_index], old, source_index); + } + } + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::ARGMIN, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + auto old = cudf::detail::atomic_cas( + &target_casted[target_index], cudf::detail::ARGMIN_SENTINEL, source_index); + if (old != cudf::detail::ARGMIN_SENTINEL) { + while (source.element(source_index) < source.element(old)) { + old = cudf::detail::atomic_cas(&target_casted[target_index], old, source_index); + } + } + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +/** + * @brief A functor that updates a single element in the target column stored in shared memory by + * applying an aggregation operation to a corresponding element from a source column in global + * memory. + * + * This functor can NOT be used for dictionary columns. + * + * This is a redundant copy replicating the behavior of `elementwise_aggregator` from + * `cudf/detail/aggregation/device_aggregators.cuh`. The key difference is that this functor accepts + * a pointer to raw bytes as the source, as `column_device_view` cannot yet be constructed from + * shared memory. + */ +struct shmem_element_aggregator { + template + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + // Check nullability for all aggregation kinds but `COUNT_ALL` + if constexpr (k != cudf::aggregation::COUNT_ALL) { + if (source.is_null(source_index)) { return; } + } + update_target_element_shmem{}( + target, target_mask, target_index, source, source_index); + } +}; +} // namespace cudf::groupby::detail::hash From b8917229f8a2446c7e5f697475f76743a05e6856 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 17 Oct 2024 19:02:30 -0700 Subject: [PATCH 03/76] Control whether a file data source memory-maps the file with an environment variable (#17004) Adds an environment variable, `LIBCUDF_MMAP_ENABLED`, to control whether we memory map the input file in the data source. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Nghia Truong (https://github.com/ttnghia) - Tianyu Liu (https://github.com/kingcrimsontianyu) URL: https://github.com/rapidsai/cudf/pull/17004 --- cpp/src/io/utilities/datasource.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 28f7f08521e..2daaecadca6 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -15,6 +15,7 @@ */ #include "file_io_utilities.hpp" +#include "getenv_or.hpp" #include #include @@ -392,14 +393,21 @@ std::unique_ptr datasource::create(std::string const& filepath, size_t offset, size_t max_size_estimate) { -#ifdef CUFILE_FOUND - if (cufile_integration::is_always_enabled()) { - // avoid mmap as GDS is expected to be used for most reads + auto const use_memory_mapping = [] { + auto const policy = getenv_or("LIBCUDF_MMAP_ENABLED", std::string{"ON"}); + + if (policy == "ON") { return true; } + if (policy == "OFF") { return false; } + + CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy); + }(); + + if (use_memory_mapping) { + return std::make_unique(filepath.c_str(), offset, max_size_estimate); + } else { + // `file_source` reads the file directly, without memory mapping return std::make_unique(filepath.c_str()); } -#endif - // Use our own memory mapping implementation for direct file reads - return std::make_unique(filepath.c_str(), offset, max_size_estimate); } std::unique_ptr datasource::create(host_buffer const& buffer) From 6ca721ca95925328f4d7f2ae473c69a4a0d38af7 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Fri, 18 Oct 2024 13:24:41 -0400 Subject: [PATCH 04/76] Fix the GDS read/write segfault/bus error when the cuFile policy is set to GDS or ALWAYS (#17122) When `LIBCUDF_CUFILE_POLICY` is set to `GDS` or `ALWAYS`, cuDF uses an internal implementation to call the cuFile API and harness the GDS feature. Recent tests with these two settings were unsuccessful due to program crash. Specifically, for the `PARQUET_READER_NVBENCH`'s `parquet_read_io_compression` benchmark: - GDS write randomly crashed with segmentation fault (SIGSEGV). - GDS read randomly crashed with bus error (SIGBUS). - At the time of crash, stack frame is randomly corrupted. The root cause is the use of dangling reference, which occurs when a variable is captured by reference by nested lambdas. This PR performs a hotfix that turns out to be a 1-char change. Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17122 --- cpp/src/io/utilities/file_io_utilities.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index d7b54399f8d..98ed9b28f0a 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -239,7 +239,7 @@ std::vector> make_sliced_tasks( std::vector> slice_tasks; std::transform(slices.cbegin(), slices.cend(), std::back_inserter(slice_tasks), [&](auto& slice) { return pool.submit_task( - [&] { return function(ptr + slice.offset, slice.size, offset + slice.offset); }); + [=] { return function(ptr + slice.offset, slice.size, offset + slice.offset); }); }); return slice_tasks; } From e1c9a5a524f33c2f429f6efaf4dc6c20a0e764e4 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 18 Oct 2024 14:22:18 -0400 Subject: [PATCH 05/76] Fix clang-tidy violations for span.hpp and hostdevice_vector.hpp (#17124) Errors reported here: https://github.com/rapidsai/cudf/actions/runs/11398977412/job/31716929242 Just adding `[[nodiscard]]` to a few member functions. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Shruti Shivakumar (https://github.com/shrshi) URL: https://github.com/rapidsai/cudf/pull/17124 --- cpp/include/cudf/utilities/span.hpp | 8 ++++---- cpp/src/io/utilities/hostdevice_vector.hpp | 23 +++++++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index d558cfb5e85..21ee4fa9e9b 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -425,21 +425,21 @@ class base_2dspan { * * @return A pointer to the first element of the span */ - constexpr auto data() const noexcept { return _flat.data(); } + [[nodiscard]] constexpr auto data() const noexcept { return _flat.data(); } /** * @brief Returns the size in the span as pair. * * @return pair representing rows and columns size of the span */ - constexpr auto size() const noexcept { return _size; } + [[nodiscard]] constexpr auto size() const noexcept { return _size; } /** * @brief Returns the number of elements in the span. * * @return Number of elements in the span */ - constexpr auto count() const noexcept { return _flat.size(); } + [[nodiscard]] constexpr auto count() const noexcept { return _flat.size(); } /** * @brief Checks if the span is empty. @@ -467,7 +467,7 @@ class base_2dspan { * * @return A flattened span of the 2D span */ - constexpr RowType flat_view() const { return _flat; } + [[nodiscard]] constexpr RowType flat_view() const { return _flat; } /** * @brief Construct a 2D span from another 2D span of convertible type diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index af1ba16a424..f969b45727b 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -176,13 +176,19 @@ class hostdevice_2dvector { operator device_2dspan() const { return {device_span{_data}, _size.second}; } device_2dspan device_view() { return static_cast>(*this); } - device_2dspan device_view() const { return static_cast>(*this); } + [[nodiscard]] device_2dspan device_view() const + { + return static_cast>(*this); + } operator host_2dspan() { return {host_span{_data}, _size.second}; } operator host_2dspan() const { return {host_span{_data}, _size.second}; } host_2dspan host_view() { return static_cast>(*this); } - host_2dspan host_view() const { return static_cast>(*this); } + [[nodiscard]] host_2dspan host_view() const + { + return static_cast>(*this); + } host_span operator[](size_t row) { @@ -194,16 +200,19 @@ class hostdevice_2dvector { return host_span{_data}.subspan(row * _size.second, _size.second); } - auto size() const noexcept { return _size; } - auto count() const noexcept { return _size.first * _size.second; } - auto is_empty() const noexcept { return count() == 0; } + [[nodiscard]] auto size() const noexcept { return _size; } + [[nodiscard]] auto count() const noexcept { return _size.first * _size.second; } + [[nodiscard]] auto is_empty() const noexcept { return count() == 0; } T* base_host_ptr(size_t offset = 0) { return _data.host_ptr(offset); } T* base_device_ptr(size_t offset = 0) { return _data.device_ptr(offset); } - T const* base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); } + [[nodiscard]] T const* base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); } - T const* base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); } + [[nodiscard]] T const* base_device_ptr(size_t offset = 0) const + { + return _data.device_ptr(offset); + } [[nodiscard]] size_t size_bytes() const noexcept { return _data.size_bytes(); } From e242dce9331a544188b591688949fc7d43cc362d Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:24:00 -0700 Subject: [PATCH 06/76] Disable the Parquet reader's wide lists tables GTest by default (#17120) This PR disables Parquet reader's wide lists table gtest by default as it takes several minutes to complete with memcheck. See the discussion on PR #17059 (this [comment](https://github.com/rapidsai/cudf/pull/17059#discussion_r1805342332)) for more context. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17120 --- cpp/tests/io/parquet_reader_test.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp index ab4645c2e25..7986a3c6d70 100644 --- a/cpp/tests/io/parquet_reader_test.cpp +++ b/cpp/tests/io/parquet_reader_test.cpp @@ -2725,7 +2725,9 @@ TYPED_TEST(ParquetReaderPredicatePushdownTest, FilterTyped) CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result_table); } -TEST_F(ParquetReaderTest, ListsWideTable) +// The test below requires several minutes to complete with memcheck, thus it is disabled by +// default. +TEST_F(ParquetReaderTest, DISABLED_ListsWideTable) { auto constexpr num_rows = 2; auto constexpr num_cols = 26'755; // for slightly over 2B keys From 6ad90742f5a1efa5eecbbad25dddc46c1ed5c801 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Fri, 18 Oct 2024 15:10:09 -0500 Subject: [PATCH 07/76] Add custom "fused" groupby aggregation to Dask cuDF (#17009) The legacy Dask cuDF implementation uses a custom code path for GroupBy aggregations. However, when query-planning is enabled (the default), we use the same algorithm as the pandas backend. This PR ports the custom "fused aggregation" code path over to the dask-expr version of Dask cuDF. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/17009 --- python/dask_cudf/dask_cudf/expr/_expr.py | 215 +++++++++++++++++- python/dask_cudf/dask_cudf/expr/_groupby.py | 27 ++- .../dask_cudf/dask_cudf/tests/test_groupby.py | 23 +- 3 files changed, 257 insertions(+), 8 deletions(-) diff --git a/python/dask_cudf/dask_cudf/expr/_expr.py b/python/dask_cudf/dask_cudf/expr/_expr.py index af83a01da98..4a9f4de8b9c 100644 --- a/python/dask_cudf/dask_cudf/expr/_expr.py +++ b/python/dask_cudf/dask_cudf/expr/_expr.py @@ -6,11 +6,20 @@ from dask_expr import new_collection from dask_expr._cumulative import CumulativeBlockwise from dask_expr._expr import Elemwise, Expr, RenameAxis, VarColumns +from dask_expr._groupby import ( + DecomposableGroupbyAggregation, + GroupbyAggregation, +) from dask_expr._reductions import Reduction, Var from dask_expr.io.io import FusedParquetIO from dask_expr.io.parquet import ReadParquetPyarrowFS -from dask.dataframe.core import is_dataframe_like, make_meta, meta_nonempty +from dask.dataframe.core import ( + _concat, + is_dataframe_like, + make_meta, + meta_nonempty, +) from dask.dataframe.dispatch import is_categorical_dtype from dask.typing import no_default @@ -21,6 +30,210 @@ ## +def _get_spec_info(gb): + if isinstance(gb.arg, (dict, list)): + aggs = gb.arg.copy() + else: + aggs = gb.arg + + if gb._slice and not isinstance(aggs, dict): + aggs = {gb._slice: aggs} + + gb_cols = gb._by_columns + if isinstance(gb_cols, str): + gb_cols = [gb_cols] + columns = [c for c in gb.frame.columns if c not in gb_cols] + if not isinstance(aggs, dict): + aggs = {col: aggs for col in columns} + + # Assert if our output will have a MultiIndex; this will be the case if + # any value in the `aggs` dict is not a string (i.e. multiple/named + # aggregations per column) + str_cols_out = True + aggs_renames = {} + for col in aggs: + if isinstance(aggs[col], str) or callable(aggs[col]): + aggs[col] = [aggs[col]] + elif isinstance(aggs[col], dict): + str_cols_out = False + col_aggs = [] + for k, v in aggs[col].items(): + aggs_renames[col, v] = k + col_aggs.append(v) + aggs[col] = col_aggs + else: + str_cols_out = False + if col in gb_cols: + columns.append(col) + + return { + "aggs": aggs, + "columns": columns, + "str_cols_out": str_cols_out, + "aggs_renames": aggs_renames, + } + + +def _get_meta(gb): + spec_info = gb.spec_info + gb_cols = gb._by_columns + aggs = spec_info["aggs"].copy() + aggs_renames = spec_info["aggs_renames"] + if spec_info["str_cols_out"]: + # Metadata should use `str` for dict values if that is + # what the user originally specified (column names will + # be str, rather than tuples). + for col in aggs: + aggs[col] = aggs[col][0] + _meta = gb.frame._meta.groupby(gb_cols).agg(aggs) + if aggs_renames: + col_array = [] + agg_array = [] + for col, agg in _meta.columns: + col_array.append(col) + agg_array.append(aggs_renames.get((col, agg), agg)) + _meta.columns = pd.MultiIndex.from_arrays([col_array, agg_array]) + return _meta + + +class DecomposableCudfGroupbyAgg(DecomposableGroupbyAggregation): + sep = "___" + + @functools.cached_property + def spec_info(self): + return _get_spec_info(self) + + @functools.cached_property + def _meta(self): + return _get_meta(self) + + @property + def shuffle_by_index(self): + return False # We always group by column(s) + + @classmethod + def chunk(cls, df, *by, **kwargs): + from dask_cudf.groupby import _groupby_partition_agg + + return _groupby_partition_agg(df, **kwargs) + + @classmethod + def combine(cls, inputs, **kwargs): + from dask_cudf.groupby import _tree_node_agg + + return _tree_node_agg(_concat(inputs), **kwargs) + + @classmethod + def aggregate(cls, inputs, **kwargs): + from dask_cudf.groupby import _finalize_gb_agg + + return _finalize_gb_agg(_concat(inputs), **kwargs) + + @property + def chunk_kwargs(self) -> dict: + dropna = True if self.dropna is None else self.dropna + return { + "gb_cols": self._by_columns, + "aggs": self.spec_info["aggs"], + "columns": self.spec_info["columns"], + "dropna": dropna, + "sort": self.sort, + "sep": self.sep, + } + + @property + def combine_kwargs(self) -> dict: + dropna = True if self.dropna is None else self.dropna + return { + "gb_cols": self._by_columns, + "dropna": dropna, + "sort": self.sort, + "sep": self.sep, + } + + @property + def aggregate_kwargs(self) -> dict: + dropna = True if self.dropna is None else self.dropna + final_columns = self._slice or self._meta.columns + return { + "gb_cols": self._by_columns, + "aggs": self.spec_info["aggs"], + "columns": self.spec_info["columns"], + "final_columns": final_columns, + "as_index": True, + "dropna": dropna, + "sort": self.sort, + "sep": self.sep, + "str_cols_out": self.spec_info["str_cols_out"], + "aggs_renames": self.spec_info["aggs_renames"], + } + + +class CudfGroupbyAgg(GroupbyAggregation): + @functools.cached_property + def spec_info(self): + return _get_spec_info(self) + + @functools.cached_property + def _meta(self): + return _get_meta(self) + + def _lower(self): + return DecomposableCudfGroupbyAgg( + self.frame, + self.arg, + self.observed, + self.dropna, + self.split_every, + self.split_out, + self.sort, + self.shuffle_method, + self._slice, + *self.by, + ) + + +def _maybe_get_custom_expr( + gb, + aggs, + split_every=None, + split_out=None, + shuffle_method=None, + **kwargs, +): + from dask_cudf.groupby import ( + OPTIMIZED_AGGS, + _aggs_optimized, + _redirect_aggs, + ) + + if kwargs: + # Unsupported key-word arguments + return None + + if not hasattr(gb.obj._meta, "to_pandas"): + # Not cuDF-backed data + return None + + _aggs = _redirect_aggs(aggs) + if not _aggs_optimized(_aggs, OPTIMIZED_AGGS): + # One or more aggregations are unsupported + return None + + return CudfGroupbyAgg( + gb.obj.expr, + _aggs, + gb.observed, + gb.dropna, + split_every, + split_out, + gb.sort, + shuffle_method, + gb._slice, + *gb.by, + ) + + class CudfFusedParquetIO(FusedParquetIO): @staticmethod def _load_multiple_files( diff --git a/python/dask_cudf/dask_cudf/expr/_groupby.py b/python/dask_cudf/dask_cudf/expr/_groupby.py index 65688115b59..8a16fe7615d 100644 --- a/python/dask_cudf/dask_cudf/expr/_groupby.py +++ b/python/dask_cudf/dask_cudf/expr/_groupby.py @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from dask_expr._collection import new_collection from dask_expr._groupby import ( GroupBy as DXGroupBy, SeriesGroupBy as DXSeriesGroupBy, @@ -11,6 +12,8 @@ from cudf.core.groupby.groupby import _deprecate_collect +from dask_cudf.expr._expr import _maybe_get_custom_expr + ## ## Custom groupby classes ## @@ -54,9 +57,16 @@ def _translate_arg(arg): return arg -# TODO: These classes are mostly a work-around for missing -# `observed=False` support. -# See: https://github.com/rapidsai/cudf/issues/15173 +# We define our own GroupBy classes in Dask cuDF for +# the following reasons: +# (1) We want to use a custom `aggregate` algorithm +# that performs multiple aggregations on the +# same dataframe partition at once. The upstream +# algorithm breaks distinct aggregations into +# separate tasks. +# (2) We need to work around missing `observed=False` +# support: +# https://github.com/rapidsai/cudf/issues/15173 class GroupBy(DXGroupBy): @@ -89,8 +99,15 @@ def collect(self, **kwargs): _deprecate_collect() return self._single_agg(ListAgg, **kwargs) - def aggregate(self, arg, **kwargs): - return super().aggregate(_translate_arg(arg), **kwargs) + def aggregate(self, arg, fused=True, **kwargs): + if ( + fused + and (expr := _maybe_get_custom_expr(self, arg, **kwargs)) + is not None + ): + return new_collection(expr) + else: + return super().aggregate(_translate_arg(arg), **kwargs) class SeriesGroupBy(DXSeriesGroupBy): diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index e30474f6b94..042e69d86f4 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -14,7 +14,11 @@ import dask_cudf from dask_cudf.groupby import OPTIMIZED_AGGS, _aggs_optimized -from dask_cudf.tests.utils import QUERY_PLANNING_ON, xfail_dask_expr +from dask_cudf.tests.utils import ( + QUERY_PLANNING_ON, + require_dask_expr, + xfail_dask_expr, +) def assert_cudf_groupby_layers(ddf): @@ -556,10 +560,22 @@ def test_groupby_categorical_key(): ), ], ) +@pytest.mark.parametrize( + "fused", + [ + True, + pytest.param( + False, + marks=require_dask_expr("Not supported by legacy API"), + ), + ], +) @pytest.mark.parametrize("split_out", ["use_dask_default", 1, 2]) @pytest.mark.parametrize("split_every", [False, 4]) @pytest.mark.parametrize("npartitions", [1, 10]) -def test_groupby_agg_params(npartitions, split_every, split_out, as_index): +def test_groupby_agg_params( + npartitions, split_every, split_out, fused, as_index +): df = cudf.datasets.randomdata( nrows=150, dtypes={"name": str, "a": int, "b": int, "c": float}, @@ -574,6 +590,7 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index): "c": ["mean", "std", "var"], } + fused_kwarg = {"fused": fused} if QUERY_PLANNING_ON else {} split_kwargs = {"split_every": split_every, "split_out": split_out} if split_out == "use_dask_default": split_kwargs.pop("split_out") @@ -593,6 +610,7 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index): ddf.groupby(["name", "a"], sort=True, **maybe_as_index) .aggregate( agg_dict, + **fused_kwarg, **split_kwargs, ) .compute() @@ -614,6 +632,7 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index): # Full check (`sort=False`) gr = ddf.groupby(["name", "a"], sort=False, **maybe_as_index).aggregate( agg_dict, + **fused_kwarg, **split_kwargs, ) pr = pddf.groupby(["name", "a"], sort=False).agg( From 98eef67d12670bd592022201b3c9dcc12374a34a Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Fri, 18 Oct 2024 14:55:41 -0700 Subject: [PATCH 08/76] Extend `device_scalar` to optionally use pinned bounce buffer (#16947) Depends on https://github.com/rapidsai/cudf/pull/16945 Added `cudf::detail::device_scalar`, derived from `rmm::device_scalar`. The new class overrides function members that perform copies between host and device. New implementation uses a `cudf::detail::host_vector` as a bounce buffer to avoid performing a pageable copy. Replaced `rmm::device_scalar` with `cudf::detail::device_scalar` across libcudf. Authors: - Vukasin Milovanovic (https://github.com/vuule) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Basit Ayantunde (https://github.com/lamarrr) - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/16947 --- cpp/include/cudf/detail/copy_if.cuh | 4 +- cpp/include/cudf/detail/copy_if_else.cuh | 5 +- cpp/include/cudf/detail/copy_range.cuh | 4 +- cpp/include/cudf/detail/device_scalar.hpp | 103 ++++++++++++++++++++++ cpp/include/cudf/detail/null_mask.cuh | 4 +- cpp/include/cudf/detail/valid_if.cuh | 4 +- cpp/include/cudf/scalar/scalar.hpp | 5 +- cpp/src/bitmask/null_mask.cu | 4 +- cpp/src/copying/concatenate.cu | 5 +- cpp/src/copying/get_element.cu | 7 +- cpp/src/groupby/sort/group_quantiles.cu | 3 +- cpp/src/groupby/sort/group_std.cu | 4 +- cpp/src/interop/to_arrow_device.cu | 12 +-- cpp/src/io/json/json_normalization.cu | 6 +- cpp/src/io/json/nested_json_gpu.cu | 8 +- cpp/src/io/orc/reader_impl_decode.cu | 4 +- cpp/src/io/parquet/error.hpp | 2 +- cpp/src/io/parquet/parquet_gpu.hpp | 3 +- cpp/src/io/utilities/data_casting.cu | 5 +- cpp/src/io/utilities/type_inference.cu | 5 +- cpp/src/join/conditional_join.cu | 11 +-- cpp/src/join/distinct_hash_join.cu | 1 - cpp/src/join/mixed_join_size_kernel.cuh | 3 +- cpp/src/json/json_path.cu | 3 +- cpp/src/reductions/all.cu | 4 +- cpp/src/reductions/any.cu | 4 +- cpp/src/reductions/minmax.cu | 13 +-- cpp/src/replace/nulls.cu | 3 +- cpp/src/replace/replace.cu | 4 +- cpp/src/rolling/detail/rolling.cuh | 6 +- cpp/src/strings/case.cu | 3 +- cpp/src/strings/copying/concatenate.cu | 4 +- cpp/src/strings/replace/find_replace.cu | 2 +- cpp/src/strings/replace/multi.cu | 2 +- cpp/src/strings/replace/replace.cu | 2 +- cpp/src/strings/split/split.cuh | 3 +- cpp/src/text/tokenize.cu | 3 +- 37 files changed, 192 insertions(+), 76 deletions(-) create mode 100644 cpp/include/cudf/detail/device_scalar.hpp diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index dfb646c66c4..4159e324472 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include #include -#include #include #include @@ -256,7 +256,7 @@ struct scatter_gather_functor { cudf::detail::grid_1d grid{input.size(), block_size, per_thread}; - rmm::device_scalar null_count{0, stream}; + cudf::detail::device_scalar null_count{0, stream}; if (output.nullable()) { // Have to initialize the output mask to all zeros because we may update // it with atomicOr(). diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index a70cd5a0661..5dc75b1a3fb 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -19,12 +19,11 @@ #include #include #include +#include #include #include #include -#include - #include #include @@ -171,7 +170,7 @@ std::unique_ptr copy_if_else(bool nullable, // if we have validity in the output if (nullable) { - rmm::device_scalar valid_count{0, stream}; + cudf::detail::device_scalar valid_count{0, stream}; // call the kernel copy_if_else_kernel diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index 3aa136d630b..fcb80fe45f7 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include #include -#include #include #include @@ -154,7 +154,7 @@ void copy_range(SourceValueIterator source_value_begin, auto grid = cudf::detail::grid_1d{num_items, block_size, 1}; if (target.nullable()) { - rmm::device_scalar null_count(target.null_count(), stream); + cudf::detail::device_scalar null_count(target.null_count(), stream); auto kernel = copy_range_kernel; diff --git a/cpp/include/cudf/detail/device_scalar.hpp b/cpp/include/cudf/detail/device_scalar.hpp new file mode 100644 index 00000000000..16ca06c6561 --- /dev/null +++ b/cpp/include/cudf/detail/device_scalar.hpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace CUDF_EXPORT cudf { +namespace detail { + +template +class device_scalar : public rmm::device_scalar { + public: +#ifdef __CUDACC__ +#pragma nv_exec_check_disable +#endif + ~device_scalar() = default; + +// Implementation is the same as what compiler should generate +// Could not use default move constructor as 11.8 compiler fails to generate it +#ifdef __CUDACC__ +#pragma nv_exec_check_disable +#endif + device_scalar(device_scalar&& other) noexcept + : rmm::device_scalar{std::move(other)}, bounce_buffer{std::move(other.bounce_buffer)} + { + } + device_scalar& operator=(device_scalar&&) noexcept = default; + + device_scalar(device_scalar const&) = delete; + device_scalar& operator=(device_scalar const&) = delete; + + device_scalar() = delete; + + explicit device_scalar( + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) + : rmm::device_scalar(stream, mr), bounce_buffer{make_host_vector(1, stream)} + { + } + + explicit device_scalar( + T const& initial_value, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) + : rmm::device_scalar(stream, mr), bounce_buffer{make_host_vector(1, stream)} + { + bounce_buffer[0] = initial_value; + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + device_scalar(device_scalar const& other, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) + : rmm::device_scalar(other, stream, mr), bounce_buffer{make_host_vector(1, stream)} + { + } + + [[nodiscard]] T value(rmm::cuda_stream_view stream) const + { + cuda_memcpy(bounce_buffer, device_span{this->data(), 1}, stream); + return bounce_buffer[0]; + } + + void set_value_async(T const& value, rmm::cuda_stream_view stream) + { + bounce_buffer[0] = value; + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + void set_value_async(T&& value, rmm::cuda_stream_view stream) + { + bounce_buffer[0] = std::move(value); + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + void set_value_to_zero_async(rmm::cuda_stream_view stream) { set_value_async(T{}, stream); } + + private: + mutable cudf::detail::host_vector bounce_buffer; +}; + +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 327c732716c..482265d633e 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -25,7 +26,6 @@ #include #include -#include #include #include @@ -165,7 +165,7 @@ size_type inplace_bitmask_binop(Binop op, "Mask pointer cannot be null"); rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref(); - rmm::device_scalar d_counter{0, stream, mr}; + cudf::detail::device_scalar d_counter{0, stream, mr}; rmm::device_uvector d_masks(masks.size(), stream, mr); rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index cfb2e70bfed..af182b69c3a 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -25,7 +26,6 @@ #include #include -#include #include @@ -101,7 +101,7 @@ std::pair valid_if(InputIterator begin, size_type null_count{0}; if (size > 0) { - rmm::device_scalar valid_count{0, stream}; + cudf::detail::device_scalar valid_count{0, stream}; constexpr size_type block_size{256}; grid_1d grid{size, block_size}; diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 66be2a12fbe..360dde11fc0 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -94,8 +95,8 @@ class scalar { [[nodiscard]] bool const* validity_data() const; protected: - data_type _type{type_id::EMPTY}; ///< Logical type of value in the scalar - rmm::device_scalar _is_valid; ///< Device bool signifying validity + data_type _type{type_id::EMPTY}; ///< Logical type of value in the scalar + cudf::detail::device_scalar _is_valid; ///< Device bool signifying validity /** * @brief Move constructor for scalar. diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 4ca05f9c335..e6659f76c7c 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include -#include #include #include @@ -329,7 +329,7 @@ cudf::size_type count_set_bits(bitmask_type const* bitmask, cudf::detail::grid_1d grid(num_words, block_size); - rmm::device_scalar non_zero_count(0, stream); + cudf::detail::device_scalar non_zero_count(0, stream); count_set_bits_kernel <<>>( diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index b8e140f1fa5..d8419760120 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -162,7 +163,7 @@ size_type concatenate_masks(device_span d_views, size_type output_size, rmm::cuda_stream_view stream) { - rmm::device_scalar d_valid_count(0, stream); + cudf::detail::device_scalar d_valid_count(0, stream); constexpr size_type block_size{256}; cudf::detail::grid_1d config(output_size, block_size); concatenate_masks_kernel @@ -265,7 +266,7 @@ std::unique_ptr fused_concatenate(host_span views, auto out_view = out_col->mutable_view(); auto d_out_view = mutable_column_device_view::create(out_view, stream); - rmm::device_scalar d_valid_count(0, stream); + cudf::detail::device_scalar d_valid_count(0, stream); // Launch kernel constexpr size_type block_size{256}; diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 29a28f81d1a..80b0bd5242f 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,7 @@ struct get_element_functor { auto device_col = column_device_view::create(input, stream); rmm::device_scalar temp_data(stream, mr); - rmm::device_scalar temp_valid(stream, mr); + cudf::detail::device_scalar temp_valid(stream, mr); device_single_thread( [buffer = temp_data.data(), @@ -155,8 +156,8 @@ struct get_element_functor { auto device_col = column_device_view::create(input, stream); - rmm::device_scalar temp_data(stream, mr); - rmm::device_scalar temp_valid(stream, mr); + cudf::detail::device_scalar temp_data(stream, mr); + cudf::detail::device_scalar temp_valid(stream, mr); device_single_thread( [buffer = temp_data.data(), diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index 82d557b9f7e..d6c900fb689 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -108,7 +109,7 @@ struct quantiles_functor { auto values_view = column_device_view::create(values, stream); auto group_size_view = column_device_view::create(group_sizes, stream); auto result_view = mutable_column_device_view::create(result->mutable_view(), stream); - auto null_count = rmm::device_scalar(0, stream, mr); + auto null_count = cudf::detail::device_scalar(0, stream, mr); // For each group, calculate quantile if (!cudf::is_dictionary(values.type())) { diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 86ee20dbbe2..c3dfac46502 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include #include -#include #include #include @@ -134,7 +134,7 @@ struct var_functor { // set nulls auto result_view = mutable_column_device_view::create(*result, stream); - auto null_count = rmm::device_scalar(0, stream, mr); + auto null_count = cudf::detail::device_scalar(0, stream, mr); auto d_null_count = null_count.data(); thrust::for_each_n( rmm::exec_policy(stream), diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu index a2874b46b06..fc1b0226a48 100644 --- a/cpp/src/interop/to_arrow_device.cu +++ b/cpp/src/interop/to_arrow_device.cu @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include -#include #include #include @@ -60,7 +60,7 @@ template struct is_device_scalar : public std::false_type {}; template -struct is_device_scalar> : public std::true_type {}; +struct is_device_scalar> : public std::true_type {}; template struct is_device_uvector : public std::false_type {}; @@ -232,10 +232,10 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colum // in the offsets buffer. While some arrow implementations may accept a zero-sized // offsets buffer, best practices would be to allocate the buffer with the single value. if (nanoarrow_type == NANOARROW_TYPE_STRING) { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } else { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } @@ -466,10 +466,10 @@ int dispatch_to_arrow_device_view::operator()(ArrowArray* out if (column.size() == 0) { // https://github.com/rapidsai/cudf/pull/15047#discussion_r1546528552 if (nanoarrow_type == NANOARROW_TYPE_LARGE_STRING) { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } else { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } diff --git a/cpp/src/io/json/json_normalization.cu b/cpp/src/io/json/json_normalization.cu index 2d435dc8e1a..34a87918e57 100644 --- a/cpp/src/io/json/json_normalization.cu +++ b/cpp/src/io/json/json_normalization.cu @@ -16,6 +16,7 @@ #include "io/fst/lookup_tables.cuh" +#include #include #include #include @@ -24,7 +25,6 @@ #include #include -#include #include #include @@ -316,7 +316,7 @@ void normalize_single_quotes(datasource::owning_buffer& inda stream); rmm::device_buffer outbuf(indata.size() * 2, stream, mr); - rmm::device_scalar outbuf_size(stream, mr); + cudf::detail::device_scalar outbuf_size(stream, mr); parser.Transduce(reinterpret_cast(indata.data()), static_cast(indata.size()), static_cast(outbuf.data()), @@ -401,7 +401,7 @@ std:: stream); rmm::device_uvector outbuf_indices(inbuf.size(), stream, mr); - rmm::device_scalar outbuf_indices_size(stream, mr); + cudf::detail::device_scalar outbuf_indices_size(stream, mr); parser.Transduce(inbuf.data(), static_cast(inbuf.size()), thrust::make_discard_iterator(), diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 69a51fab5dc..534b30a6089 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -21,6 +21,7 @@ #include "nested_json.hpp" #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include -#include #include #include @@ -1446,7 +1446,7 @@ void get_stack_context(device_span json_in, constexpr StackSymbolT read_symbol = 'x'; // Number of stack operations in the input (i.e., number of '{', '}', '[', ']' outside of quotes) - rmm::device_scalar d_num_stack_ops(stream); + cudf::detail::device_scalar d_num_stack_ops(stream); // Sequence of stack symbols and their position in the original input (sparse representation) rmm::device_uvector stack_ops{json_in.size(), stream}; @@ -1519,7 +1519,7 @@ std::pair, rmm::device_uvector> pr stream); auto const mr = cudf::get_current_device_resource_ref(); - rmm::device_scalar d_num_selected_tokens(stream, mr); + cudf::detail::device_scalar d_num_selected_tokens(stream, mr); rmm::device_uvector filtered_tokens_out{tokens.size(), stream, mr}; rmm::device_uvector filtered_token_indices_out{tokens.size(), stream, mr}; @@ -1638,7 +1638,7 @@ std::pair, rmm::device_uvector> ge std::size_t constexpr max_tokens_per_struct = 6; auto const max_token_out_count = cudf::util::div_rounding_up_safe(json_in.size(), min_chars_per_struct) * max_tokens_per_struct; - rmm::device_scalar num_written_tokens{stream}; + cudf::detail::device_scalar num_written_tokens{stream}; // In case we're recovering on invalid JSON lines, post-processing the token stream requires to // see a JSON-line delimiter as the very first item SymbolOffsetT const delimiter_offset = diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index a1e4aa65dcf..c42348a165f 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -22,6 +22,7 @@ #include "io/utilities/hostdevice_span.hpp" #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include -#include #include #include @@ -451,7 +451,7 @@ void decode_stream_data(int64_t num_dicts, update_null_mask(chunks, out_buffers, stream, mr); } - rmm::device_scalar error_count(0, stream); + cudf::detail::device_scalar error_count(0, stream); gpu::DecodeOrcColumnData(chunks.base_device_ptr(), global_dict.data(), row_groups, diff --git a/cpp/src/io/parquet/error.hpp b/cpp/src/io/parquet/error.hpp index f0fc9fab3ab..8b3d1d7a6c3 100644 --- a/cpp/src/io/parquet/error.hpp +++ b/cpp/src/io/parquet/error.hpp @@ -26,7 +26,7 @@ namespace cudf::io::parquet { /** - * @brief Wrapper around a `rmm::device_scalar` for use in reporting errors that occur in + * @brief Specialized device scalar for use in reporting errors that occur in * kernel calls. * * The `kernel_error` object is created with a `rmm::cuda_stream_view` which is used throughout diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 4f6d41a97da..be502b581af 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -22,14 +22,13 @@ #include "io/parquet/parquet_common.hpp" #include "io/statistics/statistics.cuh" #include "io/utilities/column_buffer.hpp" -#include "io/utilities/hostdevice_vector.hpp" +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index f70171eef68..0c49b2e5d78 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -800,7 +801,7 @@ template static std::unique_ptr parse_string(string_view_pair_it str_tuples, size_type col_size, rmm::device_buffer&& null_mask, - rmm::device_scalar& d_null_count, + cudf::detail::device_scalar& d_null_count, cudf::io::parse_options_view const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -930,7 +931,7 @@ std::unique_ptr parse_data( CUDF_FUNC_RANGE(); if (col_size == 0) { return make_empty_column(col_type); } - auto d_null_count = rmm::device_scalar(null_count, stream); + auto d_null_count = cudf::detail::device_scalar(null_count, stream); auto null_count_data = d_null_count.data(); if (null_mask.is_empty()) { null_mask = cudf::create_null_mask(col_size, mask_state::ALL_VALID, stream, mr); diff --git a/cpp/src/io/utilities/type_inference.cu b/cpp/src/io/utilities/type_inference.cu index 43dc38c4ac6..af32b207d20 100644 --- a/cpp/src/io/utilities/type_inference.cu +++ b/cpp/src/io/utilities/type_inference.cu @@ -18,11 +18,10 @@ #include "io/utilities/string_parsing.hpp" #include "io/utilities/trie.cuh" +#include #include #include -#include - #include #include @@ -242,7 +241,7 @@ cudf::io::column_type_histogram infer_column_type(OptionsView const& options, constexpr int block_size = 128; auto const grid_size = (size + block_size - 1) / block_size; - auto d_column_info = rmm::device_scalar(stream); + auto d_column_info = cudf::detail::device_scalar(stream); CUDF_CUDA_TRY(cudaMemsetAsync( d_column_info.data(), 0, sizeof(cudf::io::column_type_histogram), stream.value())); diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index 2ec23e0dc6d..40d1c925889 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -81,7 +82,7 @@ std::unique_ptr> conditional_join_anti_semi( join_size = *output_size; } else { // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); if (has_nulls) { compute_conditional_join_output_size <<>>( @@ -94,7 +95,7 @@ std::unique_ptr> conditional_join_anti_semi( join_size = size.value(stream); } - rmm::device_scalar write_index(0, stream); + cudf::detail::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); @@ -197,7 +198,7 @@ conditional_join(table_view const& left, join_size = *output_size; } else { // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); if (has_nulls) { compute_conditional_join_output_size <<>>( @@ -231,7 +232,7 @@ conditional_join(table_view const& left, std::make_unique>(0, stream, mr)); } - rmm::device_scalar write_index(0, stream); + cudf::detail::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); auto right_indices = std::make_unique>(join_size, stream, mr); @@ -342,7 +343,7 @@ std::size_t compute_conditional_join_output_size(table_view const& left, auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block; // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); // Determine number of output rows without actually building the output to simply // find what the size of the output will be. diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index c7294152982..515d28201e8 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -27,7 +27,6 @@ #include #include -#include #include #include diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh index 84e9be45030..4049ccf35e1 100644 --- a/cpp/src/join/mixed_join_size_kernel.cuh +++ b/cpp/src/join/mixed_join_size_kernel.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -122,7 +123,7 @@ std::size_t launch_compute_mixed_join_output_size( rmm::device_async_resource_ref mr) { // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); compute_mixed_join_output_size <<>>( diff --git a/cpp/src/json/json_path.cu b/cpp/src/json/json_path.cu index 59fdbedf089..fb5cf66dd60 100644 --- a/cpp/src/json/json_path.cu +++ b/cpp/src/json/json_path.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1031,7 +1032,7 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c cudf::detail::create_null_mask(col.size(), mask_state::UNINITIALIZED, stream, mr); // compute results - rmm::device_scalar d_valid_count{0, stream}; + cudf::detail::device_scalar d_valid_count{0, stream}; get_json_object_kernel <<>>( diff --git a/cpp/src/reductions/all.cu b/cpp/src/reductions/all.cu index 67ea29a2cb1..890625830a5 100644 --- a/cpp/src/reductions/all.cu +++ b/cpp/src/reductions/all.cu @@ -16,6 +16,7 @@ #include "simple.cuh" +#include #include #include #include @@ -65,7 +66,8 @@ struct all_fn { cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); }(); - auto d_result = rmm::device_scalar(1, stream, cudf::get_current_device_resource_ref()); + auto d_result = + cudf::detail::device_scalar(1, stream, cudf::get_current_device_resource_ref()); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), input.size(), diff --git a/cpp/src/reductions/any.cu b/cpp/src/reductions/any.cu index 057f038c622..d70da369d72 100644 --- a/cpp/src/reductions/any.cu +++ b/cpp/src/reductions/any.cu @@ -16,6 +16,7 @@ #include "simple.cuh" +#include #include #include #include @@ -65,7 +66,8 @@ struct any_fn { cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); }(); - auto d_result = rmm::device_scalar(0, stream, cudf::get_current_device_resource_ref()); + auto d_result = + cudf::detail::device_scalar(0, stream, cudf::get_current_device_resource_ref()); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), input.size(), diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 139de068050..4f6eb23ce5b 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -69,18 +70,18 @@ struct minmax_pair { * @param num_items number of items to reduce * @param binary_op binary operator used to reduce * @param stream CUDA stream to run kernels on. - * @return rmm::device_scalar + * @return cudf::detail::device_scalar */ template ::type> -rmm::device_scalar reduce_device(InputIterator d_in, - size_type num_items, - Op binary_op, - rmm::cuda_stream_view stream) +auto reduce_device(InputIterator d_in, + size_type num_items, + Op binary_op, + rmm::cuda_stream_view stream) { OutputType identity{}; - rmm::device_scalar result{identity, stream}; + cudf::detail::device_scalar result{identity, stream}; // Allocate temporary storage size_t storage_bytes = 0; diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 1df1549432f..d0e3358cc34 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -137,7 +138,7 @@ struct replace_nulls_column_kernel_forwarder { auto device_out = cudf::mutable_column_device_view::create(output_view, stream); auto device_replacement = cudf::column_device_view::create(replacement, stream); - rmm::device_scalar valid_counter(0, stream); + cudf::detail::device_scalar valid_counter(0, stream); cudf::size_type* valid_count = valid_counter.data(); replace<<>>( diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 86ec8cfc91e..0cc97ca05e0 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -53,7 +54,6 @@ #include #include -#include #include #include @@ -182,7 +182,7 @@ struct replace_kernel_forwarder { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - rmm::device_scalar valid_counter(0, stream); + cudf::detail::device_scalar valid_counter(0, stream); cudf::size_type* valid_count = valid_counter.data(); auto replace = [&] { diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index 528700137bf..bc0ee2eb519 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,6 @@ #include #include -#include #include #include @@ -1105,7 +1105,7 @@ struct rolling_window_launcher { auto const d_inp_ptr = column_device_view::create(input, stream); auto const d_default_out_ptr = column_device_view::create(default_outputs, stream); auto const d_out_ptr = mutable_column_device_view::create(output->mutable_view(), stream); - auto d_valid_count = rmm::device_scalar{0, stream}; + auto d_valid_count = cudf::detail::device_scalar{0, stream}; auto constexpr block_size = 256; auto const grid = cudf::detail::grid_1d(input.size(), block_size); @@ -1271,7 +1271,7 @@ std::unique_ptr rolling_window_udf(column_view const& input, udf_agg._output_type, input.size(), cudf::mask_state::UNINITIALIZED, stream, mr); auto output_view = output->mutable_view(); - rmm::device_scalar device_valid_count{0, stream}; + cudf::detail::device_scalar device_valid_count{0, stream}; std::string kernel_name = jitify2::reflection::Template("cudf::rolling::jit::gpu_rolling_new") // diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index 4c015f3cbed..6a7c8ea45e9 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -348,7 +349,7 @@ std::unique_ptr convert_case(strings_column_view const& input, // This check incurs ~20% performance hit for smaller strings and so we only use it // after the threshold check above. The check makes very little impact for long strings // but results in a large performance gain when the input contains only single-byte characters. - rmm::device_scalar mb_count(0, stream); + cudf::detail::device_scalar mb_count(0, stream); // cudf::detail::grid_1d is limited to size_type elements auto const num_blocks = util::div_rounding_up_safe(chars_size / bytes_per_thread, block_size); // we only need to check every other byte since either will contain high bit diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 1d9d12686eb..9e4ef47ff79 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include #include -#include #include #include @@ -242,7 +242,7 @@ std::unique_ptr concatenate(host_span columns, } { // Copy offsets columns with single kernel launch - rmm::device_scalar d_valid_count(0, stream); + cudf::detail::device_scalar d_valid_count(0, stream); constexpr size_type block_size{256}; cudf::detail::grid_1d config(offsets_count, block_size); diff --git a/cpp/src/strings/replace/find_replace.cu b/cpp/src/strings/replace/find_replace.cu index 8a8001dd81a..957075017ba 100644 --- a/cpp/src/strings/replace/find_replace.cu +++ b/cpp/src/strings/replace/find_replace.cu @@ -14,6 +14,7 @@ * limitations under the License. */ #include +#include #include #include #include @@ -21,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index 352d883bdc5..88f343926c9 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -334,7 +334,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // Count the number of targets in the entire column. // Note this may over-count in the case where a target spans adjacent strings. - rmm::device_scalar d_count(0, stream); + cudf::detail::device_scalar d_count(0, stream); auto const num_blocks = util::div_rounding_up_safe( util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); count_targets<<>>(fn, chars_bytes, d_count.data()); diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index 16df0dbabdf..52ddef76c1a 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -285,7 +285,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // Count the number of targets in the entire column. // Note this may over-count in the case where a target spans adjacent strings. - rmm::device_scalar d_target_count(0, stream); + cudf::detail::device_scalar d_target_count(0, stream); constexpr int64_t block_size = 512; constexpr size_type bytes_per_thread = 4; auto const num_blocks = util::div_rounding_up_safe( diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index 81aca001d53..4b777be9d5b 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -361,7 +362,7 @@ std::pair, rmm::device_uvector> split cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset()); // count the number of delimiters in the entire column - rmm::device_scalar d_count(0, stream); + cudf::detail::device_scalar d_count(0, stream); if (chars_bytes > 0) { constexpr int64_t block_size = 512; constexpr size_type bytes_per_thread = 4; diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu index df25950e6d5..89ca8a089d6 100644 --- a/cpp/src/text/tokenize.cu +++ b/cpp/src/text/tokenize.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -221,7 +222,7 @@ std::unique_ptr character_tokenize(cudf::strings_column_view const // To minimize memory, count the number of characters so we can // build the output offsets without an intermediate buffer. // In the worst case each byte is a character so the output is 4x the input. - rmm::device_scalar d_count(0, stream); + cudf::detail::device_scalar d_count(0, stream); auto const num_blocks = cudf::util::div_rounding_up_safe( cudf::util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); From fdd2b262aa76400d3d57018461eba37892445a4b Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Fri, 18 Oct 2024 21:03:45 -0400 Subject: [PATCH 09/76] Changing developer guide int_64_t to int64_t (#17130) Fixes #17129 Authors: - Mike Wilson (https://github.com/hyperbolic2346) Approvers: - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) - Bradley Dice (https://github.com/bdice) - Alessandro Bellina (https://github.com/abellina) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/17130 --- cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index fce8adb4c06..311539efbfc 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -370,7 +370,7 @@ any type that cudf supports. For example, a `list_scalar` representing a list of |Value type|Scalar class|Notes| |-|-|-| |fixed-width|`fixed_width_scalar`| `T` can be any fixed-width type| -|numeric|`numeric_scalar` | `T` can be `int8_t`, `int16_t`, `int32_t`, `int_64_t`, `float` or `double`| +|numeric|`numeric_scalar` | `T` can be `int8_t`, `int16_t`, `int32_t`, `int64_t`, `float` or `double`| |fixed-point|`fixed_point_scalar` | `T` can be `numeric::decimal32` or `numeric::decimal64`| |timestamp|`timestamp_scalar` | `T` can be `timestamp_D`, `timestamp_s`, etc.| |duration|`duration_scalar` | `T` can be `duration_D`, `duration_s`, etc.| From 1ce2526bde7f77d2da7d0927a052fd9ccf69b9f2 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Sat, 19 Oct 2024 10:40:25 -0500 Subject: [PATCH 10/76] Replace old host tree algorithm with new algorithm in JSON reader (#17019) This PR replaced old tree algorithm in JSON reader, with experimental algorithms and removed the experimental namespace. Changes are old tree algorithm code removal, experimental namespace removal, code of `scatter_offsets` moved, always call new tree algorithm. No functional change is made in this PR. All unit tests should pass with this change. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Shruti Shivakumar (https://github.com/shrshi) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17019 --- cpp/src/io/json/host_tree_algorithms.cu | 857 ++++++------------------ cpp/src/io/json/json_column.cu | 29 +- cpp/src/io/json/nested_json.hpp | 15 - 3 files changed, 224 insertions(+), 677 deletions(-) diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu index f7e8134b68d..7ee652e0239 100644 --- a/cpp/src/io/json/host_tree_algorithms.cu +++ b/cpp/src/io/json/host_tree_algorithms.cu @@ -134,12 +134,13 @@ std::vector copy_strings_to_host_sync( // build std::string vector from chars and offsets std::vector host_data; host_data.reserve(col.size()); - std::transform( - std::begin(h_offsets), - std::end(h_offsets) - 1, - std::begin(h_offsets) + 1, - std::back_inserter(host_data), - [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); + std::transform(std::begin(h_offsets), + std::end(h_offsets) - 1, + std::begin(h_offsets) + 1, + std::back_inserter(host_data), + [&h_chars](auto start, auto end) { + return std::string(h_chars.data() + start, end - start); + }); return host_data; }; return to_host(d_column_names->view()); @@ -173,633 +174,79 @@ rmm::device_uvector is_all_nulls_each_column(device_span auto parse_opt = parsing_options(options, stream); thrust::for_each_n( rmm::exec_policy_nosync(stream), - thrust::counting_iterator(0), - num_nodes, - [options = parse_opt.view(), - data = input.data(), - column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin(), - range_begin = tree.node_range_begin.begin(), - range_end = tree.node_range_end.begin(), - is_all_nulls = is_all_nulls.begin()] __device__(size_type i) { - auto const node_category = column_categories[col_ids[i]]; - if (node_category == NC_STR or node_category == NC_VAL) { - auto const is_null_literal = serialized_trie_contains( - options.trie_na, - {data + range_begin[i], static_cast(range_end[i] - range_begin[i])}); - if (!is_null_literal) is_all_nulls[col_ids[i]] = false; - } - }); - return is_all_nulls; -} - -NodeIndexT get_row_array_parent_col_id(device_span col_ids, - bool is_enabled_lines, - rmm::cuda_stream_view stream) -{ - NodeIndexT value = parent_node_sentinel; - if (!col_ids.empty()) { - auto const list_node_index = is_enabled_lines ? 0 : 1; - CUDF_CUDA_TRY(cudaMemcpyAsync(&value, - col_ids.data() + list_node_index, - sizeof(NodeIndexT), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); - } - return value; -} -/** - * @brief Holds member data pointers of `d_json_column` - * - */ -struct json_column_data { - using row_offset_t = json_column::row_offset_t; - row_offset_t* string_offsets; - row_offset_t* string_lengths; - row_offset_t* child_offsets; - bitmask_type* validity; -}; - -using hashmap_of_device_columns = - std::unordered_map>; - -std::pair, hashmap_of_device_columns> build_tree( - device_json_column& root, - host_span is_str_column_all_nulls, - tree_meta_t& d_column_tree, - device_span d_unique_col_ids, - device_span d_max_row_offsets, - std::vector const& column_names, - NodeIndexT row_array_parent_col_id, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); -void scatter_offsets(tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_span node_ids, - device_span sorted_col_ids, // Reuse this for parent_col_ids - tree_meta_t const& d_column_tree, - host_span ignore_vals, - hashmap_of_device_columns const& columns, - rmm::cuda_stream_view stream); - -/** - * @brief Constructs `d_json_column` from node tree representation - * Newly constructed columns are inserted into `root`'s children. - * `root` must be a list type. - * - * @param input Input JSON string device data - * @param tree Node tree representation of the JSON string - * @param col_ids Column ids of the nodes in the tree - * @param row_offsets Row offsets of the nodes in the tree - * @param root Root node of the `d_json_column` tree - * @param is_array_of_arrays Whether the tree is an array of arrays - * @param options Parsing options specifying the parsing behaviour - * options affecting behaviour are - * is_enabled_lines: Whether the input is a line-delimited JSON - * is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the device memory - * of child_offets and validity members of `d_json_column` - */ -void make_device_json_column(device_span input, - tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_json_column& root, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - bool const is_enabled_lines = options.is_enabled_lines(); - bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); - // make a copy - auto sorted_col_ids = cudf::detail::make_device_uvector_async( - col_ids, stream, cudf::get_current_device_resource_ref()); - - // sort by {col_id} on {node_ids} stable - rmm::device_uvector node_ids(col_ids.size(), stream); - thrust::sequence(rmm::exec_policy_nosync(stream), node_ids.begin(), node_ids.end()); - thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - node_ids.begin()); - - NodeIndexT const row_array_parent_col_id = - get_row_array_parent_col_id(col_ids, is_enabled_lines, stream); - - // 1. gather column information. - auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = - reduce_to_column_tree(tree, - col_ids, - sorted_col_ids, - node_ids, - row_offsets, - is_array_of_arrays, - row_array_parent_col_id, - stream); - auto num_columns = d_unique_col_ids.size(); - std::vector column_names = copy_strings_to_host_sync( - input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream); - // array of arrays column names - if (is_array_of_arrays) { - auto const unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); - auto const column_parent_ids = - cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); - TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2; - auto values_column_indices = - get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream); - auto h_values_column_indices = - cudf::detail::make_host_vector_sync(values_column_indices, stream); - std::transform(unique_col_ids.begin(), - unique_col_ids.end(), - column_names.cbegin(), - column_names.begin(), - [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id]( - auto col_id, auto name) mutable { - return column_parent_ids[col_id] == row_array_parent_col_id - ? std::to_string(h_values_column_indices[col_id]) - : name; - }); - } - - auto const is_str_column_all_nulls = [&, &column_tree = d_column_tree]() { - if (is_enabled_mixed_types_as_string) { - return cudf::detail::make_std_vector_sync( - is_all_nulls_each_column(input, column_tree, tree, col_ids, options, stream), stream); - } - return std::vector(); - }(); - auto const [ignore_vals, columns] = build_tree(root, - is_str_column_all_nulls, - d_column_tree, - d_unique_col_ids, - d_max_row_offsets, - column_names, - row_array_parent_col_id, - is_array_of_arrays, - options, - stream, - mr); - - scatter_offsets(tree, - col_ids, - row_offsets, - node_ids, - sorted_col_ids, - d_column_tree, - ignore_vals, - columns, - stream); -} - -std::pair, hashmap_of_device_columns> build_tree( - device_json_column& root, - host_span is_str_column_all_nulls, - tree_meta_t& d_column_tree, - device_span d_unique_col_ids, - device_span d_max_row_offsets, - std::vector const& column_names, - NodeIndexT row_array_parent_col_id, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); - auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); - auto column_categories = - cudf::detail::make_host_vector_async(d_column_tree.node_categories, stream); - auto const column_parent_ids = - cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); - auto column_range_beg = - cudf::detail::make_host_vector_async(d_column_tree.node_range_begin, stream); - auto const max_row_offsets = cudf::detail::make_host_vector_async(d_max_row_offsets, stream); - auto num_columns = d_unique_col_ids.size(); - stream.synchronize(); - - auto to_json_col_type = [](auto category) { - switch (category) { - case NC_STRUCT: return json_col_t::StructColumn; - case NC_LIST: return json_col_t::ListColumn; - case NC_STR: [[fallthrough]]; - case NC_VAL: return json_col_t::StringColumn; - default: return json_col_t::Unknown; - } - }; - auto init_to_zero = [stream](auto& v) { - thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), v.begin(), v.end(), 0); - }; - - auto initialize_json_columns = [&](auto i, auto& col, auto column_category) { - if (column_category == NC_ERR || column_category == NC_FN) { - return; - } else if (column_category == NC_VAL || column_category == NC_STR) { - col.string_offsets.resize(max_row_offsets[i] + 1, stream); - col.string_lengths.resize(max_row_offsets[i] + 1, stream); - init_to_zero(col.string_offsets); - init_to_zero(col.string_lengths); - } else if (column_category == NC_LIST) { - col.child_offsets.resize(max_row_offsets[i] + 2, stream); - init_to_zero(col.child_offsets); - } - col.num_rows = max_row_offsets[i] + 1; - col.validity = - cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); - col.type = to_json_col_type(column_category); - }; - - auto reinitialize_as_string = [&](auto i, auto& col) { - col.string_offsets.resize(max_row_offsets[i] + 1, stream); - col.string_lengths.resize(max_row_offsets[i] + 1, stream); - init_to_zero(col.string_offsets); - init_to_zero(col.string_lengths); - col.num_rows = max_row_offsets[i] + 1; - col.validity = - cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); - col.type = json_col_t::StringColumn; - // destroy references of all child columns after this step, by calling remove_child_columns - }; - - path_from_tree tree_path{column_categories, - column_parent_ids, - column_names, - is_array_of_arrays, - row_array_parent_col_id}; - - // 2. generate nested columns tree and its device_memory - // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order. - auto h_range_col_id_it = - thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin()); - std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { - return thrust::get<0>(a) < thrust::get<0>(b); - }); - - // use hash map because we may skip field name's col_ids - hashmap_of_device_columns columns; - // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking - std::map, NodeIndexT> mapped_columns; - // find column_ids which are values, but should be ignored in validity - auto ignore_vals = cudf::detail::make_host_vector(num_columns, stream); - std::fill(ignore_vals.begin(), ignore_vals.end(), false); - std::vector is_mixed_type_column(num_columns, 0); - std::vector is_pruned(num_columns, 0); - // for columns that are not mixed type but have been forced as string - std::vector forced_as_string_column(num_columns); - columns.try_emplace(parent_node_sentinel, std::ref(root)); - - std::function remove_child_columns = - [&](NodeIndexT this_col_id, device_json_column& col) { - for (auto const& col_name : col.column_order) { - auto child_id = mapped_columns[{this_col_id, col_name}]; - is_mixed_type_column[child_id] = 1; - remove_child_columns(child_id, col.child_columns.at(col_name)); - mapped_columns.erase({this_col_id, col_name}); - columns.erase(child_id); - } - col.child_columns.clear(); // their references are deleted above. - col.column_order.clear(); - }; - - auto name_and_parent_index = [&is_array_of_arrays, - &row_array_parent_col_id, - &column_parent_ids, - &column_categories, - &column_names](auto this_col_id) { - std::string name = ""; - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) { - if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) { - name = column_names[this_col_id]; - } else { - name = list_child_name; - } - } else if (column_categories[parent_col_id] == NC_FN) { - auto field_name_col_id = parent_col_id; - parent_col_id = column_parent_ids[parent_col_id]; - name = column_names[field_name_col_id]; - } else { - CUDF_FAIL("Unexpected parent column category"); - } - return std::pair{name, parent_col_id}; - }; - - // Prune columns that are not required to be parsed. - if (options.is_enabled_prune_columns()) { - for (auto const this_col_id : unique_col_ids) { - if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { - continue; - } - // Struct, List, String, Value - auto [name, parent_col_id] = name_and_parent_index(this_col_id); - // get path of this column, and get its dtype if present in options - auto const nt = tree_path.get_path(this_col_id); - std::optional const user_dtype = get_path_data_type(nt, options); - if (!user_dtype.has_value() and parent_col_id != parent_node_sentinel) { - is_pruned[this_col_id] = 1; - continue; - } else { - // make sure all its parents are not pruned. - while (parent_col_id != parent_node_sentinel and is_pruned[parent_col_id] == 1) { - is_pruned[parent_col_id] = 0; - parent_col_id = column_parent_ids[parent_col_id]; - } - } - } - } - - // Build the column tree, also, handles mixed types. - for (auto const this_col_id : unique_col_ids) { - if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { - continue; - } - // Struct, List, String, Value - auto [name, parent_col_id] = name_and_parent_index(this_col_id); - - // if parent is mixed type column or this column is pruned or if parent - // has been forced as string, ignore this column. - if (parent_col_id != parent_node_sentinel && - (is_mixed_type_column[parent_col_id] || is_pruned[this_col_id]) || - forced_as_string_column[parent_col_id]) { - ignore_vals[this_col_id] = true; - if (is_mixed_type_column[parent_col_id]) { is_mixed_type_column[this_col_id] = 1; } - if (forced_as_string_column[parent_col_id]) { forced_as_string_column[this_col_id] = true; } - continue; - } - - // If the child is already found, - // replace if this column is a nested column and the existing was a value column - // ignore this column if this column is a value column and the existing was a nested column - auto it = columns.find(parent_col_id); - CUDF_EXPECTS(it != columns.end(), "Parent column not found"); - auto& parent_col = it->second.get(); - bool replaced = false; - if (mapped_columns.count({parent_col_id, name}) > 0) { - auto const old_col_id = mapped_columns[{parent_col_id, name}]; - // If mixed type as string is enabled, make both of them strings and merge them. - // All child columns will be ignored when parsing. - if (is_enabled_mixed_types_as_string) { - bool const is_mixed_type = [&]() { - // If new or old is STR and they are all not null, make it mixed type, else ignore. - if (column_categories[this_col_id] == NC_VAL || - column_categories[this_col_id] == NC_STR) { - if (is_str_column_all_nulls[this_col_id]) return false; - } - if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { - if (is_str_column_all_nulls[old_col_id]) return false; - } - return true; - }(); - if (is_mixed_type) { - is_mixed_type_column[this_col_id] = 1; - is_mixed_type_column[old_col_id] = 1; - // if old col type (not cat) is list or struct, replace with string. - auto& col = columns.at(old_col_id).get(); - if (col.type == json_col_t::ListColumn or col.type == json_col_t::StructColumn) { - reinitialize_as_string(old_col_id, col); - remove_child_columns(old_col_id, col); - // all its children (which are already inserted) are ignored later. - } - col.forced_as_string_column = true; - columns.try_emplace(this_col_id, columns.at(old_col_id)); - continue; - } - } - - if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) { - ignore_vals[this_col_id] = true; - continue; - } - if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { - // remap - ignore_vals[old_col_id] = true; - mapped_columns.erase({parent_col_id, name}); - columns.erase(old_col_id); - parent_col.child_columns.erase(name); - replaced = true; // to skip duplicate name in column_order - } else { - // If this is a nested column but we're trying to insert either (a) a list node into a - // struct column or (b) a struct node into a list column, we fail - CUDF_EXPECTS(not((column_categories[old_col_id] == NC_LIST and - column_categories[this_col_id] == NC_STRUCT) or - (column_categories[old_col_id] == NC_STRUCT and - column_categories[this_col_id] == NC_LIST)), - "A mix of lists and structs within the same column is not supported"); - } - } - - auto this_column_category = column_categories[this_col_id]; - // get path of this column, check if it is a struct/list forced as string, and enforce it - auto const nt = tree_path.get_path(this_col_id); - std::optional const user_dtype = get_path_data_type(nt, options); - if ((column_categories[this_col_id] == NC_STRUCT or - column_categories[this_col_id] == NC_LIST) and - user_dtype.has_value() and user_dtype.value().id() == type_id::STRING) { - this_column_category = NC_STR; - } - - CUDF_EXPECTS(parent_col.child_columns.count(name) == 0, "duplicate column name: " + name); - // move into parent - device_json_column col(stream, mr); - initialize_json_columns(this_col_id, col, this_column_category); - if ((column_categories[this_col_id] == NC_STRUCT or - column_categories[this_col_id] == NC_LIST) and - user_dtype.has_value() and user_dtype.value().id() == type_id::STRING) { - col.forced_as_string_column = true; - forced_as_string_column[this_col_id] = true; - } - - auto inserted = parent_col.child_columns.try_emplace(name, std::move(col)).second; - CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent"); - if (not replaced) parent_col.column_order.push_back(name); - columns.try_emplace(this_col_id, std::ref(parent_col.child_columns.at(name))); - mapped_columns.try_emplace(std::make_pair(parent_col_id, name), this_col_id); - } - - if (is_enabled_mixed_types_as_string) { - // ignore all children of mixed type columns - for (auto const this_col_id : unique_col_ids) { - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 1) { - is_mixed_type_column[this_col_id] = 1; - ignore_vals[this_col_id] = true; - columns.erase(this_col_id); - } - // Convert only mixed type columns as string (so to copy), but not its children - if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 0 and - is_mixed_type_column[this_col_id] == 1) - column_categories[this_col_id] = NC_STR; - } - cudf::detail::cuda_memcpy_async( - d_column_tree.node_categories, column_categories, stream); - } - - // ignore all children of columns forced as string - for (auto const this_col_id : unique_col_ids) { - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id != parent_node_sentinel and forced_as_string_column[parent_col_id]) { - forced_as_string_column[this_col_id] = true; - ignore_vals[this_col_id] = true; - } - // Convert only mixed type columns as string (so to copy), but not its children - if (parent_col_id != parent_node_sentinel and not forced_as_string_column[parent_col_id] and - forced_as_string_column[this_col_id]) - column_categories[this_col_id] = NC_STR; - } - cudf::detail::cuda_memcpy_async(d_column_tree.node_categories, column_categories, stream); - - // restore unique_col_ids order - std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { - return thrust::get<1>(a) < thrust::get<1>(b); - }); - return {ignore_vals, columns}; -} - -void scatter_offsets(tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_span node_ids, - device_span sorted_col_ids, // Reuse this for parent_col_ids - tree_meta_t const& d_column_tree, - host_span ignore_vals, - hashmap_of_device_columns const& columns, - rmm::cuda_stream_view stream) -{ - auto const num_nodes = col_ids.size(); - auto const num_columns = d_column_tree.node_categories.size(); - // move columns data to device. - auto columns_data = cudf::detail::make_host_vector(num_columns, stream); - for (auto& [col_id, col_ref] : columns) { - if (col_id == parent_node_sentinel) continue; - auto& col = col_ref.get(); - columns_data[col_id] = json_column_data{col.string_offsets.data(), - col.string_lengths.data(), - col.child_offsets.data(), - static_cast(col.validity.data())}; - } - - auto d_ignore_vals = cudf::detail::make_device_uvector_async( - ignore_vals, stream, cudf::get_current_device_resource_ref()); - auto d_columns_data = cudf::detail::make_device_uvector_async( - columns_data, stream, cudf::get_current_device_resource_ref()); - - // 3. scatter string offsets to respective columns, set validity bits - thrust::for_each_n( - rmm::exec_policy_nosync(stream), - thrust::counting_iterator(0), - num_nodes, - [column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin(), - row_offsets = row_offsets.begin(), - range_begin = tree.node_range_begin.begin(), - range_end = tree.node_range_end.begin(), - d_ignore_vals = d_ignore_vals.begin(), - d_columns_data = d_columns_data.begin()] __device__(size_type i) { - if (d_ignore_vals[col_ids[i]]) return; - auto const node_category = column_categories[col_ids[i]]; - switch (node_category) { - case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; - case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; - case NC_STR: [[fallthrough]]; - case NC_VAL: - if (d_ignore_vals[col_ids[i]]) break; - set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); - d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i]; - d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i]; - break; - default: break; - } - }); - - // 4. scatter List offset - // copy_if only node's whose parent is list, (node_id, parent_col_id) - // stable_sort by parent_col_id of {node_id}. - // For all unique parent_node_id of (i==0, i-1!=i), write start offset. - // (i==last, i+1!=i), write end offset. - // unique_copy_by_key {parent_node_id} {row_offset} to - // col[parent_col_id].child_offsets[row_offset[parent_node_id]] - - auto& parent_col_ids = sorted_col_ids; // reuse sorted_col_ids - auto parent_col_id = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - cuda::proclaim_return_type( - [col_ids = col_ids.begin(), - parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) { - return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel - : col_ids[parent_node_ids[node_id]]; - })); - auto const list_children_end = thrust::copy_if( - rmm::exec_policy_nosync(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id) + - num_nodes, - thrust::make_counting_iterator(0), - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), - [d_ignore_vals = d_ignore_vals.begin(), - parent_node_ids = tree.parent_node_ids.begin(), - column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin()] __device__(size_type node_id) { - auto parent_node_id = parent_node_ids[node_id]; - return parent_node_id != parent_node_sentinel and - column_categories[col_ids[parent_node_id]] == NC_LIST and - (!d_ignore_vals[col_ids[parent_node_id]]); - }); - - auto const num_list_children = - list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()); - thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), - parent_col_ids.begin(), - parent_col_ids.begin() + num_list_children, - node_ids.begin()); - thrust::for_each_n( - rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - num_list_children, - [node_ids = node_ids.begin(), - parent_node_ids = tree.parent_node_ids.begin(), - parent_col_ids = parent_col_ids.begin(), - row_offsets = row_offsets.begin(), - d_columns_data = d_columns_data.begin(), - num_list_children] __device__(size_type i) { - auto const node_id = node_ids[i]; - auto const parent_node_id = parent_node_ids[node_id]; - // scatter to list_offset - if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) { - d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] = - row_offsets[node_id]; - } - // last value of list child_offset is its size. - if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) { - d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] = - row_offsets[node_id] + 1; + thrust::counting_iterator(0), + num_nodes, + [options = parse_opt.view(), + data = input.data(), + column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin(), + range_begin = tree.node_range_begin.begin(), + range_end = tree.node_range_end.begin(), + is_all_nulls = is_all_nulls.begin()] __device__(size_type i) { + auto const node_category = column_categories[col_ids[i]]; + if (node_category == NC_STR or node_category == NC_VAL) { + auto const is_null_literal = serialized_trie_contains( + options.trie_na, + {data + range_begin[i], static_cast(range_end[i] - range_begin[i])}); + if (!is_null_literal) is_all_nulls[col_ids[i]] = false; } }); + return is_all_nulls; +} - // 5. scan on offsets. - for (auto& [id, col_ref] : columns) { - auto& col = col_ref.get(); - if (col.type == json_col_t::StringColumn) { - thrust::inclusive_scan(rmm::exec_policy_nosync(stream), - col.string_offsets.begin(), - col.string_offsets.end(), - col.string_offsets.begin(), - thrust::maximum{}); - } else if (col.type == json_col_t::ListColumn) { - thrust::inclusive_scan(rmm::exec_policy_nosync(stream), - col.child_offsets.begin(), - col.child_offsets.end(), - col.child_offsets.begin(), - thrust::maximum{}); - } +NodeIndexT get_row_array_parent_col_id(device_span col_ids, + bool is_enabled_lines, + rmm::cuda_stream_view stream) +{ + NodeIndexT value = parent_node_sentinel; + if (!col_ids.empty()) { + auto const list_node_index = is_enabled_lines ? 0 : 1; + CUDF_CUDA_TRY(cudaMemcpyAsync(&value, + col_ids.data() + list_node_index, + sizeof(NodeIndexT), + cudaMemcpyDefault, + stream.value())); + stream.synchronize(); } - stream.synchronize(); + return value; } +/** + * @brief Holds member data pointers of `d_json_column` + * + */ +struct json_column_data { + using row_offset_t = json_column::row_offset_t; + row_offset_t* string_offsets; + row_offset_t* string_lengths; + row_offset_t* child_offsets; + bitmask_type* validity; +}; + +using hashmap_of_device_columns = + std::unordered_map>; + +std::pair, hashmap_of_device_columns> build_tree( + device_json_column& root, + host_span is_str_column_all_nulls, + tree_meta_t& d_column_tree, + device_span d_unique_col_ids, + device_span d_max_row_offsets, + std::vector const& column_names, + NodeIndexT row_array_parent_col_id, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); -namespace experimental { +void scatter_offsets(tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_span node_ids, + device_span sorted_col_ids, // Reuse this for parent_col_ids + tree_meta_t const& d_column_tree, + host_span ignore_vals, + hashmap_of_device_columns const& columns, + rmm::cuda_stream_view stream); std::map unified_schema(cudf::io::json_reader_options const& options) { @@ -829,19 +276,6 @@ std::map unified_schema(cudf::io::json_reader_optio options.get_dtypes()); } -std::pair, hashmap_of_device_columns> build_tree( - device_json_column& root, - host_span is_str_column_all_nulls, - tree_meta_t& d_column_tree, - device_span d_unique_col_ids, - device_span d_max_row_offsets, - std::vector const& column_names, - NodeIndexT row_array_parent_col_id, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - /** * @brief Constructs `d_json_column` from node tree representation * Newly constructed columns are inserted into `root`'s children. @@ -1033,7 +467,7 @@ std::pair, hashmap_of_device_columns> build_tree std::fill_n(is_pruned.begin(), num_columns, options.is_enabled_prune_columns()); // prune all children of a column, but not self. - auto ignore_all_children = [&](auto parent_col_id) { + auto ignore_all_children = [&adj, &is_pruned](auto parent_col_id) { std::deque offspring; if (adj.count(parent_col_id)) { for (auto const& child : adj[parent_col_id]) { @@ -1392,6 +826,145 @@ std::pair, hashmap_of_device_columns> build_tree return {is_pruned, columns}; } -} // namespace experimental + +void scatter_offsets(tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_span node_ids, + device_span sorted_col_ids, // Reuse this for parent_col_ids + tree_meta_t const& d_column_tree, + host_span ignore_vals, + hashmap_of_device_columns const& columns, + rmm::cuda_stream_view stream) +{ + auto const num_nodes = col_ids.size(); + auto const num_columns = d_column_tree.node_categories.size(); + // move columns data to device. + auto columns_data = cudf::detail::make_host_vector(num_columns, stream); + for (auto& [col_id, col_ref] : columns) { + if (col_id == parent_node_sentinel) continue; + auto& col = col_ref.get(); + columns_data[col_id] = json_column_data{col.string_offsets.data(), + col.string_lengths.data(), + col.child_offsets.data(), + static_cast(col.validity.data())}; + } + + auto d_ignore_vals = cudf::detail::make_device_uvector_async( + ignore_vals, stream, cudf::get_current_device_resource_ref()); + auto d_columns_data = cudf::detail::make_device_uvector_async( + columns_data, stream, cudf::get_current_device_resource_ref()); + + // 3. scatter string offsets to respective columns, set validity bits + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::counting_iterator(0), + num_nodes, + [column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin(), + row_offsets = row_offsets.begin(), + range_begin = tree.node_range_begin.begin(), + range_end = tree.node_range_end.begin(), + d_ignore_vals = d_ignore_vals.begin(), + d_columns_data = d_columns_data.begin()] __device__(size_type i) { + if (d_ignore_vals[col_ids[i]]) return; + auto const node_category = column_categories[col_ids[i]]; + switch (node_category) { + case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; + case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; + case NC_STR: [[fallthrough]]; + case NC_VAL: + if (d_ignore_vals[col_ids[i]]) break; + set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); + d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i]; + d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i]; + break; + default: break; + } + }); + + // 4. scatter List offset + // copy_if only node's whose parent is list, (node_id, parent_col_id) + // stable_sort by parent_col_id of {node_id}. + // For all unique parent_node_id of (i==0, i-1!=i), write start offset. + // (i==last, i+1!=i), write end offset. + // unique_copy_by_key {parent_node_id} {row_offset} to + // col[parent_col_id].child_offsets[row_offset[parent_node_id]] + + auto& parent_col_ids = sorted_col_ids; // reuse sorted_col_ids + auto parent_col_id = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [col_ids = col_ids.begin(), + parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) { + return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel + : col_ids[parent_node_ids[node_id]]; + })); + auto const list_children_end = thrust::copy_if( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id) + + num_nodes, + thrust::make_counting_iterator(0), + thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), + [d_ignore_vals = d_ignore_vals.begin(), + parent_node_ids = tree.parent_node_ids.begin(), + column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin()] __device__(size_type node_id) { + auto parent_node_id = parent_node_ids[node_id]; + return parent_node_id != parent_node_sentinel and + column_categories[col_ids[parent_node_id]] == NC_LIST and + (!d_ignore_vals[col_ids[parent_node_id]]); + }); + + auto const num_list_children = + list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()); + thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), + parent_col_ids.begin(), + parent_col_ids.begin() + num_list_children, + node_ids.begin()); + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + num_list_children, + [node_ids = node_ids.begin(), + parent_node_ids = tree.parent_node_ids.begin(), + parent_col_ids = parent_col_ids.begin(), + row_offsets = row_offsets.begin(), + d_columns_data = d_columns_data.begin(), + num_list_children] __device__(size_type i) { + auto const node_id = node_ids[i]; + auto const parent_node_id = parent_node_ids[node_id]; + // scatter to list_offset + if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) { + d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] = + row_offsets[node_id]; + } + // last value of list child_offset is its size. + if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) { + d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] = + row_offsets[node_id] + 1; + } + }); + + // 5. scan on offsets. + for (auto& [id, col_ref] : columns) { + auto& col = col_ref.get(); + if (col.type == json_col_t::StringColumn) { + thrust::inclusive_scan(rmm::exec_policy_nosync(stream), + col.string_offsets.begin(), + col.string_offsets.end(), + col.string_offsets.begin(), + thrust::maximum{}); + } else if (col.type == json_col_t::ListColumn) { + thrust::inclusive_scan(rmm::exec_policy_nosync(stream), + col.child_offsets.begin(), + col.child_offsets.end(), + col.child_offsets.begin(), + thrust::maximum{}); + } + } + stream.synchronize(); +} } // namespace cudf::io::json::detail diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 912e93d52ae..4584f71775f 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -485,16 +485,6 @@ std::pair, std::vector> device_json_co } } -template -auto make_device_json_column_dispatch(bool experimental, Args&&... args) -{ - if (experimental) { - return experimental::make_device_json_column(std::forward(args)...); - } else { - return make_device_json_column(std::forward(args)...); - } -} - table_with_metadata device_parse_nested_json(device_span d_input, cudf::io::json_reader_options const& options, rmm::cuda_stream_view stream, @@ -553,16 +543,15 @@ table_with_metadata device_parse_nested_json(device_span d_input, 0); // Get internal JSON column - make_device_json_column_dispatch(options.is_enabled_experimental(), - d_input, - gpu_tree, - gpu_col_id, - gpu_row_offsets, - root_column, - is_array_of_arrays, - options, - stream, - mr); + make_device_json_column(d_input, + gpu_tree, + gpu_col_id, + gpu_row_offsets, + root_column, + is_array_of_arrays, + options, + stream, + mr); // data_root refers to the root column of the data represented by the given JSON string auto& data_root = diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 3d9a51833e0..f6be4539d7f 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -405,21 +405,6 @@ void make_device_json_column(device_span input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -namespace experimental { -/** - * @copydoc cudf::io::json::detail::make_device_json_column - */ -void make_device_json_column(device_span input, - tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_json_column& root, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); -} // namespace experimental - /** * @brief Retrieves the parse_options to be used for type inference and type casting * From 074ab749531aa136c546afc7837fec0b404fe022 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Sat, 19 Oct 2024 14:34:24 -0700 Subject: [PATCH 11/76] Split hash-based groupby into multiple smaller files to reduce build time (#17089) This work is part of splitting the original bulk shared memory groupby PR https://github.com/rapidsai/cudf/pull/16619. This PR splits the hash-based groupby file into multiple translation units and uses explicit template instantiations to help reduce build time. It also includes some minor cleanups without significant functional changes. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/17089 --- cpp/CMakeLists.txt | 5 + cpp/src/groupby/hash/compute_groupby.cu | 142 ++++++ cpp/src/groupby/hash/compute_groupby.hpp | 95 ++++ .../groupby/hash/compute_single_pass_aggs.cu | 99 ++++ .../groupby/hash/compute_single_pass_aggs.hpp | 38 ++ .../hash/create_sparse_results_table.cu | 67 +++ .../hash/create_sparse_results_table.hpp | 32 ++ cpp/src/groupby/hash/groupby.cu | 459 +----------------- .../hash/hash_compound_agg_finalizer.cu | 199 ++++++++ .../hash/hash_compound_agg_finalizer.hpp | 69 +++ cpp/src/groupby/hash/helpers.cuh | 116 +++++ ...y_kernels.cuh => single_pass_functors.cuh} | 14 +- .../groupby/hash/sparse_to_dense_results.cu | 72 +++ .../groupby/hash/sparse_to_dense_results.hpp | 51 ++ 14 files changed, 1012 insertions(+), 446 deletions(-) create mode 100644 cpp/src/groupby/hash/compute_groupby.cu create mode 100644 cpp/src/groupby/hash/compute_groupby.hpp create mode 100644 cpp/src/groupby/hash/compute_single_pass_aggs.cu create mode 100644 cpp/src/groupby/hash/compute_single_pass_aggs.hpp create mode 100644 cpp/src/groupby/hash/create_sparse_results_table.cu create mode 100644 cpp/src/groupby/hash/create_sparse_results_table.hpp create mode 100644 cpp/src/groupby/hash/hash_compound_agg_finalizer.cu create mode 100644 cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp create mode 100644 cpp/src/groupby/hash/helpers.cuh rename cpp/src/groupby/hash/{groupby_kernels.cuh => single_pass_functors.cuh} (95%) create mode 100644 cpp/src/groupby/hash/sparse_to_dense_results.cu create mode 100644 cpp/src/groupby/hash/sparse_to_dense_results.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 32a753c9f40..e4b9cbf8921 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -368,8 +368,13 @@ add_library( src/filling/repeat.cu src/filling/sequence.cu src/groupby/groupby.cu + src/groupby/hash/compute_groupby.cu + src/groupby/hash/compute_single_pass_aggs.cu + src/groupby/hash/create_sparse_results_table.cu src/groupby/hash/flatten_single_pass_aggs.cpp src/groupby/hash/groupby.cu + src/groupby/hash/hash_compound_agg_finalizer.cu + src/groupby/hash/sparse_to_dense_results.cu src/groupby/sort/aggregate.cpp src/groupby/sort/group_argmax.cu src/groupby/sort/group_argmin.cu diff --git a/cpp/src/groupby/hash/compute_groupby.cu b/cpp/src/groupby/hash/compute_groupby.cu new file mode 100644 index 00000000000..59457bea694 --- /dev/null +++ b/cpp/src/groupby/hash/compute_groupby.cu @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_groupby.hpp" +#include "compute_single_pass_aggs.hpp" +#include "helpers.cuh" +#include "sparse_to_dense_results.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +rmm::device_uvector extract_populated_keys(SetType const& key_set, + size_type num_keys, + rmm::cuda_stream_view stream) +{ + rmm::device_uvector populated_keys(num_keys, stream); + auto const keys_end = key_set.retrieve_all(populated_keys.begin(), stream.value()); + + populated_keys.resize(std::distance(populated_keys.begin(), keys_end), stream); + return populated_keys; +} + +template +std::unique_ptr compute_groupby(table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + Equal const& d_row_equal, + Hash const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + // convert to int64_t to avoid potential overflow with large `keys` + auto const num_keys = static_cast(keys.num_rows()); + + // Cache of sparse results where the location of aggregate value in each + // column is indexed by the hash set + cudf::detail::result_cache sparse_results(requests.size()); + + auto const set = cuco::static_set{ + num_keys, + cudf::detail::CUCO_DESIRED_LOAD_FACTOR, // 50% load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_row_equal, + probing_scheme_t{d_row_hash}, + cuco::thread_scope_device, + cuco::storage{}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; + + auto row_bitmask = + skip_rows_with_nulls + ? cudf::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first + : rmm::device_buffer{}; + + // Compute all single pass aggs first + compute_single_pass_aggs(num_keys, + skip_rows_with_nulls, + static_cast(row_bitmask.data()), + set.ref(cuco::insert_and_find), + requests, + &sparse_results, + stream); + + // Extract the populated indices from the hash set and create a gather map. + // Gathering using this map from sparse results will give dense results. + auto gather_map = extract_populated_keys(set, keys.num_rows(), stream); + + // Compact all results from sparse_results and insert into cache + sparse_to_dense_results(requests, + &sparse_results, + cache, + gather_map, + set.ref(cuco::find), + static_cast(row_bitmask.data()), + stream, + mr); + + return cudf::detail::gather(keys, + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); +} + +template rmm::device_uvector extract_populated_keys( + global_set_t const& key_set, size_type num_keys, rmm::cuda_stream_view stream); + +template rmm::device_uvector extract_populated_keys( + nullable_global_set_t const& key_set, size_type num_keys, rmm::cuda_stream_view stream); + +template std::unique_ptr
compute_groupby( + table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + row_comparator_t const& d_row_equal, + row_hash_t const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template std::unique_ptr
compute_groupby( + table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + nullable_row_comparator_t const& d_row_equal, + row_hash_t const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_groupby.hpp b/cpp/src/groupby/hash/compute_groupby.hpp new file mode 100644 index 00000000000..7bb3a60ff07 --- /dev/null +++ b/cpp/src/groupby/hash/compute_groupby.hpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Computes and returns a device vector containing all populated keys in + * `key_set`. + * + * @tparam SetType Type of key hash set + * + * @param key_set Key hash set + * @param num_keys Number of input keys + * @param stream CUDA stream used for device memory operations and kernel launches + * @return An array of unique keys contained in `key_set` + */ +template +rmm::device_uvector extract_populated_keys(SetType const& key_set, + size_type num_keys, + rmm::cuda_stream_view stream); + +/** + * @brief Computes groupby using hash table. + * + * First, we create a hash table that stores the indices of unique rows in + * `keys`. The upper limit on the number of values in this map is the number + * of rows in `keys`. + * + * To store the results of aggregations, we create temporary sparse columns + * which have the same size as input value columns. Using the hash map, we + * determine the location within the sparse column to write the result of the + * aggregation into. + * + * The sparse column results of all aggregations are stored into the cache + * `sparse_results`. This enables the use of previously calculated results in + * other aggregations. + * + * All the aggregations which can be computed in a single pass are computed + * first, in a combined kernel. Then using these results, aggregations that + * require multiple passes, will be computed. + * + * Finally, using the hash map, we generate a vector of indices of populated + * values in sparse result columns. Then, for each aggregation originally + * requested in `requests`, we gather sparse results into a column of dense + * results using the aforementioned index vector. Dense results are stored into + * the in/out parameter `cache`. + * + * @tparam Equal Device row comparator type + * @tparam Hash Device row hasher type + * + * @param keys Table whose rows act as the groupby keys + * @param requests The set of columns to aggregate and the aggregations to perform + * @param skip_rows_with_nulls Flag indicating whether to ignore nulls or not + * @param d_row_equal Device row comparator + * @param d_row_hash Device row hasher + * @param cache Dense aggregation results + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table + * @return Table of unique keys + */ +template +std::unique_ptr compute_groupby(table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + Equal const& d_row_equal, + Hash const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_single_pass_aggs.cu b/cpp/src/groupby/hash/compute_single_pass_aggs.cu new file mode 100644 index 00000000000..e292543e6e9 --- /dev/null +++ b/cpp/src/groupby/hash/compute_single_pass_aggs.cu @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_single_pass_aggs.hpp" +#include "create_sparse_results_table.hpp" +#include "flatten_single_pass_aggs.hpp" +#include "helpers.cuh" +#include "single_pass_functors.cuh" +#include "var_hash_functor.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Computes all aggregations from `requests` that require a single pass + * over the data and stores the results in `sparse_results` + */ +template +void compute_single_pass_aggs(int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + SetType set, + host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream) +{ + // flatten the aggs to a table that can be operated on by aggregate_row + auto const [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests); + + // make table that will hold sparse results + table sparse_table = create_sparse_results_table(flattened_values, agg_kinds, stream); + // prepare to launch kernel to do the actual aggregation + auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); + auto d_values = table_device_view::create(flattened_values, stream); + auto const d_aggs = cudf::detail::make_device_uvector_async( + agg_kinds, stream, cudf::get_current_device_resource_ref()); + + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + num_keys, + hash::compute_single_pass_aggs_fn{ + set, *d_values, *d_sparse_table, d_aggs.data(), row_bitmask, skip_rows_with_nulls}); + // Add results back to sparse_results cache + auto sparse_result_cols = sparse_table.release(); + for (size_t i = 0; i < aggs.size(); i++) { + // Note that the cache will make a copy of this temporary aggregation + sparse_results->add_result( + flattened_values.column(i), *aggs[i], std::move(sparse_result_cols[i])); + } +} + +template void compute_single_pass_aggs>( + int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + hash_set_ref_t set, + host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream); + +template void compute_single_pass_aggs>( + int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + nullable_hash_set_ref_t set, + host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_single_pass_aggs.hpp b/cpp/src/groupby/hash/compute_single_pass_aggs.hpp new file mode 100644 index 00000000000..a7434bdf61a --- /dev/null +++ b/cpp/src/groupby/hash/compute_single_pass_aggs.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Computes all aggregations from `requests` that require a single pass + * over the data and stores the results in `sparse_results` + */ +template +void compute_single_pass_aggs(int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + SetType set, + cudf::host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/create_sparse_results_table.cu b/cpp/src/groupby/hash/create_sparse_results_table.cu new file mode 100644 index 00000000000..22fa4fc584c --- /dev/null +++ b/cpp/src/groupby/hash/create_sparse_results_table.cu @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "create_sparse_results_table.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +namespace cudf::groupby::detail::hash { +// make table that will hold sparse results +cudf::table create_sparse_results_table(table_view const& flattened_values, + std::vector aggs, + rmm::cuda_stream_view stream) +{ + // TODO single allocation - room for performance improvement + std::vector> sparse_columns; + sparse_columns.reserve(flattened_values.num_columns()); + std::transform( + flattened_values.begin(), + flattened_values.end(), + aggs.begin(), + std::back_inserter(sparse_columns), + [stream](auto const& col, auto const& agg) { + bool nullable = + (agg == aggregation::COUNT_VALID or agg == aggregation::COUNT_ALL) + ? false + : (col.has_nulls() or agg == aggregation::VARIANCE or agg == aggregation::STD); + auto mask_flag = (nullable) ? mask_state::ALL_NULL : mask_state::UNALLOCATED; + + auto col_type = cudf::is_dictionary(col.type()) + ? cudf::dictionary_column_view(col).keys().type() + : col.type(); + + return make_fixed_width_column( + cudf::detail::target_type(col_type, agg), col.size(), mask_flag, stream); + }); + + table sparse_table(std::move(sparse_columns)); + mutable_table_view table_view = sparse_table.mutable_view(); + cudf::detail::initialize_with_identity(table_view, aggs, stream); + return sparse_table; +} +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/create_sparse_results_table.hpp b/cpp/src/groupby/hash/create_sparse_results_table.hpp new file mode 100644 index 00000000000..c1d4e0d3f20 --- /dev/null +++ b/cpp/src/groupby/hash/create_sparse_results_table.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace cudf::groupby::detail::hash { +// make table that will hold sparse results +cudf::table create_sparse_results_table(table_view const& flattened_values, + std::vector aggs_kinds, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 0432b9d120a..30e1d52fdbf 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -14,60 +14,32 @@ * limitations under the License. */ -#include "flatten_single_pass_aggs.hpp" +#include "compute_groupby.hpp" #include "groupby/common/utils.hpp" -#include "groupby_kernels.cuh" -#include "var_hash_functor.cuh" +#include "helpers.cuh" #include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include -#include -#include +#include #include #include -#include #include #include -#include #include #include -#include #include #include #include -#include -#include -#include - +#include #include -#include #include +#include -namespace cudf { -namespace groupby { -namespace detail { -namespace hash { +namespace cudf::groupby::detail::hash { namespace { - -// TODO: similar to `contains_table`, using larger CG size like 2 or 4 for nested -// types and `cg_size = 1`for flat data to improve performance -using probing_scheme_type = cuco::linear_probing< - 1, ///< Number of threads used to handle each input key - cudf::experimental::row::hash::device_row_hasher>; - /** * @brief List of aggregation operations that can be computed with a hash-based * implementation. @@ -112,413 +84,33 @@ bool constexpr is_hash_aggregation(aggregation::Kind t) return array_contains(hash_aggregations, t); } -template -class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { - column_view col; - data_type result_type; - cudf::detail::result_cache* sparse_results; - cudf::detail::result_cache* dense_results; - device_span gather_map; - SetType set; - bitmask_type const* __restrict__ row_bitmask; - rmm::cuda_stream_view stream; - rmm::device_async_resource_ref mr; - - public: - using cudf::detail::aggregation_finalizer::visit; - - hash_compound_agg_finalizer(column_view col, - cudf::detail::result_cache* sparse_results, - cudf::detail::result_cache* dense_results, - device_span gather_map, - SetType set, - bitmask_type const* row_bitmask, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - : col(col), - sparse_results(sparse_results), - dense_results(dense_results), - gather_map(gather_map), - set(set), - row_bitmask(row_bitmask), - stream(stream), - mr(mr) - { - result_type = cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).keys().type() - : col.type(); - } - - auto to_dense_agg_result(cudf::aggregation const& agg) - { - auto s = sparse_results->get_result(col, agg); - auto dense_result_table = cudf::detail::gather(table_view({std::move(s)}), - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); - return std::move(dense_result_table->release()[0]); - } - - // Enables conversion of ARGMIN/ARGMAX into MIN/MAX - auto gather_argminmax(aggregation const& agg) - { - auto arg_result = to_dense_agg_result(agg); - // We make a view of ARG(MIN/MAX) result without a null mask and gather - // using this map. The values in data buffer of ARG(MIN/MAX) result - // corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL - // which is an out of bounds index value (-1) and causes the gathered - // value to be null. - column_view null_removed_map( - data_type(type_to_id()), - arg_result->size(), - static_cast(arg_result->view().template data()), - nullptr, - 0); - auto gather_argminmax = - cudf::detail::gather(table_view({col}), - null_removed_map, - arg_result->nullable() ? cudf::out_of_bounds_policy::NULLIFY - : cudf::out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); - return std::move(gather_argminmax->release()[0]); - } - - // Declare overloads for each kind of aggregation to dispatch - void visit(cudf::aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - - void visit(cudf::detail::min_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - if (result_type.id() == type_id::STRING) { - auto transformed_agg = make_argmin_aggregation(); - dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); - } else { - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - } - - void visit(cudf::detail::max_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - - if (result_type.id() == type_id::STRING) { - auto transformed_agg = make_argmax_aggregation(); - dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); - } else { - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - } - - void visit(cudf::detail::mean_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - - auto sum_agg = make_sum_aggregation(); - auto count_agg = make_count_aggregation(); - this->visit(*sum_agg); - this->visit(*count_agg); - column_view sum_result = dense_results->get_result(col, *sum_agg); - column_view count_result = dense_results->get_result(col, *count_agg); - - auto result = - cudf::detail::binary_operation(sum_result, - count_result, - binary_operator::DIV, - cudf::detail::target_type(result_type, aggregation::MEAN), - stream, - mr); - dense_results->add_result(col, agg, std::move(result)); - } - - void visit(cudf::detail::var_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - - auto sum_agg = make_sum_aggregation(); - auto count_agg = make_count_aggregation(); - this->visit(*sum_agg); - this->visit(*count_agg); - column_view sum_result = sparse_results->get_result(col, *sum_agg); - column_view count_result = sparse_results->get_result(col, *count_agg); - - auto values_view = column_device_view::create(col, stream); - auto sum_view = column_device_view::create(sum_result, stream); - auto count_view = column_device_view::create(count_result, stream); - - auto var_result = make_fixed_width_column( - cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream); - auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream); - mutable_table_view var_table_view{{var_result->mutable_view()}}; - cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream); - - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - col.size(), - var_hash_functor{ - set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); - sparse_results->add_result(col, agg, std::move(var_result)); - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - - void visit(cudf::detail::std_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - auto var_agg = make_variance_aggregation(agg._ddof); - this->visit(*dynamic_cast(var_agg.get())); - column_view variance = dense_results->get_result(col, *var_agg); - - auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr); - dense_results->add_result(col, agg, std::move(result)); - } -}; - -/** - * @brief Gather sparse results into dense using `gather_map` and add to - * `dense_cache` - * - * @see groupby_null_templated() - */ -template -void sparse_to_dense_results(table_view const& keys, - host_span requests, - cudf::detail::result_cache* sparse_results, - cudf::detail::result_cache* dense_results, - device_span gather_map, - SetType set, - bool keys_have_nulls, - null_policy include_null_keys, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto row_bitmask = - cudf::detail::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first; - bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; - bitmask_type const* row_bitmask_ptr = - skip_key_rows_with_nulls ? static_cast(row_bitmask.data()) : nullptr; - - for (auto const& request : requests) { - auto const& agg_v = request.aggregations; - auto const& col = request.values; - - // Given an aggregation, this will get the result from sparse_results and - // convert and return dense, compacted result - auto finalizer = hash_compound_agg_finalizer( - col, sparse_results, dense_results, gather_map, set, row_bitmask_ptr, stream, mr); - for (auto&& agg : agg_v) { - agg->finalize(finalizer); - } - } -} - -// make table that will hold sparse results -auto create_sparse_results_table(table_view const& flattened_values, - std::vector aggs, - rmm::cuda_stream_view stream) -{ - // TODO single allocation - room for performance improvement - std::vector> sparse_columns; - std::transform( - flattened_values.begin(), - flattened_values.end(), - aggs.begin(), - std::back_inserter(sparse_columns), - [stream](auto const& col, auto const& agg) { - bool nullable = - (agg == aggregation::COUNT_VALID or agg == aggregation::COUNT_ALL) - ? false - : (col.has_nulls() or agg == aggregation::VARIANCE or agg == aggregation::STD); - auto mask_flag = (nullable) ? mask_state::ALL_NULL : mask_state::UNALLOCATED; - - auto col_type = cudf::is_dictionary(col.type()) - ? cudf::dictionary_column_view(col).keys().type() - : col.type(); - - return make_fixed_width_column( - cudf::detail::target_type(col_type, agg), col.size(), mask_flag, stream); - }); - - table sparse_table(std::move(sparse_columns)); - mutable_table_view table_view = sparse_table.mutable_view(); - cudf::detail::initialize_with_identity(table_view, aggs, stream); - return sparse_table; -} - -/** - * @brief Computes all aggregations from `requests` that require a single pass - * over the data and stores the results in `sparse_results` - */ -template -void compute_single_pass_aggs(table_view const& keys, - host_span requests, - cudf::detail::result_cache* sparse_results, - SetType set, - bool keys_have_nulls, - null_policy include_null_keys, - rmm::cuda_stream_view stream) -{ - // flatten the aggs to a table that can be operated on by aggregate_row - auto const [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests); - - // make table that will hold sparse results - table sparse_table = create_sparse_results_table(flattened_values, agg_kinds, stream); - // prepare to launch kernel to do the actual aggregation - auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); - auto d_values = table_device_view::create(flattened_values, stream); - auto const d_aggs = cudf::detail::make_device_uvector_async( - agg_kinds, stream, cudf::get_current_device_resource_ref()); - auto const skip_key_rows_with_nulls = - keys_have_nulls and include_null_keys == null_policy::EXCLUDE; - - auto row_bitmask = - skip_key_rows_with_nulls - ? cudf::detail::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first - : rmm::device_buffer{}; - - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - keys.num_rows(), - hash::compute_single_pass_aggs_fn{set, - *d_values, - *d_sparse_table, - d_aggs.data(), - static_cast(row_bitmask.data()), - skip_key_rows_with_nulls}); - // Add results back to sparse_results cache - auto sparse_result_cols = sparse_table.release(); - for (size_t i = 0; i < aggs.size(); i++) { - // Note that the cache will make a copy of this temporary aggregation - sparse_results->add_result( - flattened_values.column(i), *aggs[i], std::move(sparse_result_cols[i])); - } -} - -/** - * @brief Computes and returns a device vector containing all populated keys in - * `map`. - */ -template -rmm::device_uvector extract_populated_keys(SetType const& key_set, - size_type num_keys, - rmm::cuda_stream_view stream) -{ - rmm::device_uvector populated_keys(num_keys, stream); - auto const keys_end = key_set.retrieve_all(populated_keys.begin(), stream.value()); - - populated_keys.resize(std::distance(populated_keys.begin(), keys_end), stream); - return populated_keys; -} - -/** - * @brief Computes groupby using hash table. - * - * First, we create a hash table that stores the indices of unique rows in - * `keys`. The upper limit on the number of values in this map is the number - * of rows in `keys`. - * - * To store the results of aggregations, we create temporary sparse columns - * which have the same size as input value columns. Using the hash map, we - * determine the location within the sparse column to write the result of the - * aggregation into. - * - * The sparse column results of all aggregations are stored into the cache - * `sparse_results`. This enables the use of previously calculated results in - * other aggregations. - * - * All the aggregations which can be computed in a single pass are computed - * first, in a combined kernel. Then using these results, aggregations that - * require multiple passes, will be computed. - * - * Finally, using the hash map, we generate a vector of indices of populated - * values in sparse result columns. Then, for each aggregation originally - * requested in `requests`, we gather sparse results into a column of dense - * results using the aforementioned index vector. Dense results are stored into - * the in/out parameter `cache`. - */ -std::unique_ptr
groupby(table_view const& keys, - host_span requests, - cudf::detail::result_cache* cache, - bool const keys_have_nulls, - null_policy const include_null_keys, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) +std::unique_ptr
dispatch_groupby(table_view const& keys, + host_span requests, + cudf::detail::result_cache* cache, + bool const keys_have_nulls, + null_policy const include_null_keys, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { - // convert to int64_t to avoid potential overflow with large `keys` - auto const num_keys = static_cast(keys.num_rows()); - auto const null_keys_are_equal = null_equality::EQUAL; - auto const has_null = nullate::DYNAMIC{cudf::has_nested_nulls(keys)}; + auto const null_keys_are_equal = null_equality::EQUAL; + auto const has_null = nullate::DYNAMIC{cudf::has_nested_nulls(keys)}; + auto const skip_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream); auto const comparator = cudf::experimental::row::equality::self_comparator{preprocessed_keys}; auto const row_hash = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)}; auto const d_row_hash = row_hash.device_hasher(has_null); - // Cache of sparse results where the location of aggregate value in each - // column is indexed by the hash set - cudf::detail::result_cache sparse_results(requests.size()); - - auto const comparator_helper = [&](auto const d_key_equal) { - auto const set = cuco::static_set{ - num_keys, - 0.5, // desired load factor - cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, - d_key_equal, - probing_scheme_type{d_row_hash}, - cuco::thread_scope_device, - cuco::storage<1>{}, - cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, - stream.value()}; - - // Compute all single pass aggs first - compute_single_pass_aggs(keys, - requests, - &sparse_results, - set.ref(cuco::insert_and_find), - keys_have_nulls, - include_null_keys, - stream); - - // Extract the populated indices from the hash set and create a gather map. - // Gathering using this map from sparse results will give dense results. - auto gather_map = extract_populated_keys(set, keys.num_rows(), stream); - - // Compact all results from sparse_results and insert into cache - sparse_to_dense_results(keys, - requests, - &sparse_results, - cache, - gather_map, - set.ref(cuco::find), - keys_have_nulls, - include_null_keys, - stream, - mr); - - return cudf::detail::gather(keys, - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); - }; - if (cudf::detail::has_nested_columns(keys)) { - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); + auto const d_row_equal = comparator.equal_to(has_null, null_keys_are_equal); + return compute_groupby( + keys, requests, skip_rows_with_nulls, d_row_equal, d_row_hash, cache, stream, mr); } else { - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); + auto const d_row_equal = comparator.equal_to(has_null, null_keys_are_equal); + return compute_groupby( + keys, requests, skip_rows_with_nulls, d_row_equal, d_row_hash, cache, stream, mr); } } - } // namespace /** @@ -559,11 +151,8 @@ std::pair, std::vector> groupby( cudf::detail::result_cache cache(requests.size()); std::unique_ptr
unique_keys = - groupby(keys, requests, &cache, cudf::has_nulls(keys), include_null_keys, stream, mr); + dispatch_groupby(keys, requests, &cache, cudf::has_nulls(keys), include_null_keys, stream, mr); return std::pair(std::move(unique_keys), extract_results(requests, cache, stream, mr)); } -} // namespace hash -} // namespace detail -} // namespace groupby -} // namespace cudf +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu new file mode 100644 index 00000000000..37a61c1a22c --- /dev/null +++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hash_compound_agg_finalizer.hpp" +#include "helpers.cuh" +#include "var_hash_functor.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +template +hash_compound_agg_finalizer::hash_compound_agg_finalizer( + column_view col, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetType set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : col(col), + sparse_results(sparse_results), + dense_results(dense_results), + gather_map(gather_map), + set(set), + row_bitmask(row_bitmask), + stream(stream), + mr(mr) +{ + result_type = + cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).keys().type() : col.type(); +} + +template +auto hash_compound_agg_finalizer::to_dense_agg_result(cudf::aggregation const& agg) +{ + auto s = sparse_results->get_result(col, agg); + auto dense_result_table = cudf::detail::gather(table_view({std::move(s)}), + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + return std::move(dense_result_table->release()[0]); +} + +template +auto hash_compound_agg_finalizer::gather_argminmax(aggregation const& agg) +{ + auto arg_result = to_dense_agg_result(agg); + // We make a view of ARG(MIN/MAX) result without a null mask and gather + // using this map. The values in data buffer of ARG(MIN/MAX) result + // corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL + // which is an out of bounds index value (-1) and causes the gathered + // value to be null. + column_view null_removed_map( + data_type(type_to_id()), + arg_result->size(), + static_cast(arg_result->view().template data()), + nullptr, + 0); + auto gather_argminmax = + cudf::detail::gather(table_view({col}), + null_removed_map, + arg_result->nullable() ? cudf::out_of_bounds_policy::NULLIFY + : cudf::out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + return std::move(gather_argminmax->release()[0]); +} + +template +void hash_compound_agg_finalizer::visit(cudf::aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + dense_results->add_result(col, agg, to_dense_agg_result(agg)); +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::min_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + if (result_type.id() == type_id::STRING) { + auto transformed_agg = make_argmin_aggregation(); + dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); + } else { + dense_results->add_result(col, agg, to_dense_agg_result(agg)); + } +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::max_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + + if (result_type.id() == type_id::STRING) { + auto transformed_agg = make_argmax_aggregation(); + dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); + } else { + dense_results->add_result(col, agg, to_dense_agg_result(agg)); + } +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::mean_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + + auto sum_agg = make_sum_aggregation(); + auto count_agg = make_count_aggregation(); + this->visit(*sum_agg); + this->visit(*count_agg); + column_view sum_result = dense_results->get_result(col, *sum_agg); + column_view count_result = dense_results->get_result(col, *count_agg); + + auto result = + cudf::detail::binary_operation(sum_result, + count_result, + binary_operator::DIV, + cudf::detail::target_type(result_type, aggregation::MEAN), + stream, + mr); + dense_results->add_result(col, agg, std::move(result)); +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::var_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + + auto sum_agg = make_sum_aggregation(); + auto count_agg = make_count_aggregation(); + this->visit(*sum_agg); + this->visit(*count_agg); + column_view sum_result = sparse_results->get_result(col, *sum_agg); + column_view count_result = sparse_results->get_result(col, *count_agg); + + auto values_view = column_device_view::create(col, stream); + auto sum_view = column_device_view::create(sum_result, stream); + auto count_view = column_device_view::create(count_result, stream); + + auto var_result = make_fixed_width_column( + cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream); + auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream); + mutable_table_view var_table_view{{var_result->mutable_view()}}; + cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream); + + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + col.size(), + var_hash_functor{ + set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); + sparse_results->add_result(col, agg, std::move(var_result)); + dense_results->add_result(col, agg, to_dense_agg_result(agg)); +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::std_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + auto var_agg = make_variance_aggregation(agg._ddof); + this->visit(*dynamic_cast(var_agg.get())); + column_view variance = dense_results->get_result(col, *var_agg); + + auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr); + dense_results->add_result(col, agg, std::move(result)); +} + +template class hash_compound_agg_finalizer>; +template class hash_compound_agg_finalizer>; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp new file mode 100644 index 00000000000..8bee1a92c40 --- /dev/null +++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { + column_view col; + data_type result_type; + cudf::detail::result_cache* sparse_results; + cudf::detail::result_cache* dense_results; + device_span gather_map; + SetType set; + bitmask_type const* __restrict__ row_bitmask; + rmm::cuda_stream_view stream; + rmm::device_async_resource_ref mr; + + public: + using cudf::detail::aggregation_finalizer::visit; + + hash_compound_agg_finalizer(column_view col, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetType set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + + auto to_dense_agg_result(cudf::aggregation const& agg); + + // Enables conversion of ARGMIN/ARGMAX into MIN/MAX + auto gather_argminmax(cudf::aggregation const& agg); + + // Declare overloads for each kind of aggregation to dispatch + void visit(cudf::aggregation const& agg) override; + + void visit(cudf::detail::min_aggregation const& agg) override; + + void visit(cudf::detail::max_aggregation const& agg) override; + + void visit(cudf::detail::mean_aggregation const& agg) override; + + void visit(cudf::detail::var_aggregation const& agg) override; + + void visit(cudf::detail::std_aggregation const& agg) override; +}; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/helpers.cuh b/cpp/src/groupby/hash/helpers.cuh new file mode 100644 index 00000000000..0d117ca35b3 --- /dev/null +++ b/cpp/src/groupby/hash/helpers.cuh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +// TODO: similar to `contains_table`, using larger CG size like 2 or 4 for nested +// types and `cg_size = 1`for flat data to improve performance +/// Number of threads to handle each input element +CUDF_HOST_DEVICE auto constexpr GROUPBY_CG_SIZE = 1; + +/// Number of slots per thread +CUDF_HOST_DEVICE auto constexpr GROUPBY_WINDOW_SIZE = 1; + +/// Thread block size +CUDF_HOST_DEVICE auto constexpr GROUPBY_BLOCK_SIZE = 128; + +/// Threshold cardinality to switch between shared memory aggregations and global memory +/// aggregations +CUDF_HOST_DEVICE auto constexpr GROUPBY_CARDINALITY_THRESHOLD = 128; + +// We add additional `block_size`, because after the number of elements in the local hash set +// exceeds the threshold, all threads in the thread block can still insert one more element. +/// The maximum number of elements handled per block +CUDF_HOST_DEVICE auto constexpr GROUPBY_SHM_MAX_ELEMENTS = + GROUPBY_CARDINALITY_THRESHOLD + GROUPBY_BLOCK_SIZE; + +// GROUPBY_SHM_MAX_ELEMENTS with 0.7 occupancy +/// Shared memory hash set extent type +using shmem_extent_t = + cuco::extent(static_cast(GROUPBY_SHM_MAX_ELEMENTS) * 1.43)>; + +/// Number of windows needed by each shared memory hash set +CUDF_HOST_DEVICE auto constexpr window_extent = + cuco::make_window_extent(shmem_extent_t{}); + +/** + * @brief Returns the smallest multiple of 8 that is greater than or equal to the given integer. + */ +CUDF_HOST_DEVICE constexpr std::size_t round_to_multiple_of_8(std::size_t num) +{ + std::size_t constexpr base = 8; + return cudf::util::div_rounding_up_safe(num, base) * base; +} + +using row_hash_t = + cudf::experimental::row::hash::device_row_hasher; + +/// Probing scheme type used by groupby hash table +using probing_scheme_t = cuco::linear_probing; + +using row_comparator_t = cudf::experimental::row::equality::device_row_comparator< + false, + cudf::nullate::DYNAMIC, + cudf::experimental::row::equality::nan_equal_physical_equality_comparator>; + +using nullable_row_comparator_t = cudf::experimental::row::equality::device_row_comparator< + true, + cudf::nullate::DYNAMIC, + cudf::experimental::row::equality::nan_equal_physical_equality_comparator>; + +using global_set_t = cuco::static_set, + cuda::thread_scope_device, + row_comparator_t, + probing_scheme_t, + cudf::detail::cuco_allocator, + cuco::storage>; + +using nullable_global_set_t = cuco::static_set, + cuda::thread_scope_device, + nullable_row_comparator_t, + probing_scheme_t, + cudf::detail::cuco_allocator, + cuco::storage>; + +template +using hash_set_ref_t = cuco::static_set_ref< + cudf::size_type, + cuda::thread_scope_device, + row_comparator_t, + probing_scheme_t, + cuco::aow_storage_ref>, + Op>; + +template +using nullable_hash_set_ref_t = cuco::static_set_ref< + cudf::size_type, + cuda::thread_scope_device, + nullable_row_comparator_t, + probing_scheme_t, + cuco::aow_storage_ref>, + Op>; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/groupby_kernels.cuh b/cpp/src/groupby/hash/single_pass_functors.cuh similarity index 95% rename from cpp/src/groupby/hash/groupby_kernels.cuh rename to cpp/src/groupby/hash/single_pass_functors.cuh index 86f4d76487f..73791b3aa71 100644 --- a/cpp/src/groupby/hash/groupby_kernels.cuh +++ b/cpp/src/groupby/hash/single_pass_functors.cuh @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once #include @@ -21,12 +20,9 @@ #include #include -#include +#include -namespace cudf { -namespace groupby { -namespace detail { -namespace hash { +namespace cudf::groupby::detail::hash { /** * @brief Computes single-pass aggregations and store results into a sparse `output_values` table, * and populate `set` with indices of unique keys @@ -102,8 +98,4 @@ struct compute_single_pass_aggs_fn { } } }; - -} // namespace hash -} // namespace detail -} // namespace groupby -} // namespace cudf +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/sparse_to_dense_results.cu b/cpp/src/groupby/hash/sparse_to_dense_results.cu new file mode 100644 index 00000000000..e1c2cd22309 --- /dev/null +++ b/cpp/src/groupby/hash/sparse_to_dense_results.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hash_compound_agg_finalizer.hpp" +#include "helpers.cuh" + +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +void sparse_to_dense_results(host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetRef set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + for (auto const& request : requests) { + auto const& agg_v = request.aggregations; + auto const& col = request.values; + + // Given an aggregation, this will get the result from sparse_results and + // convert and return dense, compacted result + auto finalizer = hash_compound_agg_finalizer( + col, sparse_results, dense_results, gather_map, set, row_bitmask, stream, mr); + for (auto&& agg : agg_v) { + agg->finalize(finalizer); + } + } +} + +template void sparse_to_dense_results>( + host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + hash_set_ref_t set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template void sparse_to_dense_results>( + host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + nullable_hash_set_ref_t set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/sparse_to_dense_results.hpp b/cpp/src/groupby/hash/sparse_to_dense_results.hpp new file mode 100644 index 00000000000..3a2b3090b99 --- /dev/null +++ b/cpp/src/groupby/hash/sparse_to_dense_results.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Gather sparse aggregation results into dense using `gather_map` and add to + * `dense_results` + * + * @tparam SetRef Device hash set ref type + * + * @param[in] requests The set of columns to aggregate and the aggregations to perform + * @param[in] sparse_results Sparse aggregation results + * @param[out] dense_results Dense aggregation results + * @param[in] gather_map Gather map indicating valid elements in `sparse_results` + * @param[in] set Device hash set ref + * @param[in] row_bitmask Bitmask indicating the validity of input keys + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @param[in] mr Device memory resource used to allocate the returned table + */ +template +void sparse_to_dense_results(host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetRef set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash From 69ca3874b97e9cce6efb71e3e33ec598b57908a3 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 21 Oct 2024 18:56:07 -0500 Subject: [PATCH 12/76] Ignore loud dask warnings about legacy dataframe implementation (#17137) This PR ignores loud dask warnings about legacy dask dataframe implementation is going to be soon removed: https://github.com/dask/dask/pull/11437 Note: We only see this error for `DASK_DATAFRAME__QUERY_PLANNING=False` cases, `DASK_DATAFRAME__QUERY_PLANNING=True` are passing fine. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Bradley Dice (https://github.com/bdice) - Peter Andreas Entschev (https://github.com/pentschev) - Richard (Rick) Zamora (https://github.com/rjzamora) URL: https://github.com/rapidsai/cudf/pull/17137 --- python/dask_cudf/pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index fbcd7ae5dfb..705865d083b 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -126,5 +126,8 @@ filterwarnings = [ # https://github.com/dask/partd/blob/main/partd/pandas.py#L198 "ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning", "ignore:String support for `aggregate_files` is experimental. Behavior may change in the future.:FutureWarning:dask", + # Dask now loudly throws warnings: https://github.com/dask/dask/pull/11437 + # When the legacy implementation is removed we can remove this warning and stop running pytests with `DASK_DATAFRAME__QUERY_PLANNING=False` + "ignore:The legacy Dask DataFrame implementation is deprecated and will be removed in a future version.*:FutureWarning", ] xfail_strict = true From 13de3c1f61b49ade0d7283442209d82c9f59ae02 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Tue, 22 Oct 2024 02:24:03 -0700 Subject: [PATCH 13/76] Add compile time check to ensure the `counting_iterator` type in `counting_transform_iterator` fits in `size_type` (#17118) This PR adds a compile time check to enforce that the `start` argument to `cudf::detail::counting_transform_iterator`, which is used to determine the type of `counting_iterator`, is of a type that fits in `int32_t` (aka `size_type`). The PR also modifies the instances of `counting_transform_iterator` that need to work with `counting_iterators` of type > `int32_t` to manually created `counting_transform_iterators` using thrust. More context in this [comment](https://github.com/rapidsai/cudf/pull/17059/files#r1803925659). Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - David Wendt (https://github.com/davidwendt) - Yunsong Wang (https://github.com/PointKernel) - Vukasin Milovanovic (https://github.com/vuule) - Tianyu Liu (https://github.com/kingcrimsontianyu) URL: https://github.com/rapidsai/cudf/pull/17118 --- cpp/include/cudf/detail/iterator.cuh | 20 +++++++++++++------ .../cudf/detail/utilities/batched_memset.hpp | 4 ++-- .../cudf/strings/detail/strings_children.cuh | 17 ++++++++-------- cpp/src/io/parquet/page_data.cu | 7 ++++--- cpp/src/io/parquet/reader_impl_preprocess.cu | 8 ++++---- cpp/tests/io/orc_chunked_reader_test.cu | 9 +++++---- 6 files changed, 38 insertions(+), 27 deletions(-) diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 4349e1b70fd..30f36d6a5da 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -38,18 +38,19 @@ #include #include +#include +#include #include #include #include #include -#include - namespace cudf { namespace detail { /** * @brief Convenience wrapper for creating a `thrust::transform_iterator` over a - * `thrust::counting_iterator`. + * `thrust::counting_iterator` within the range [0, INT_MAX]. + * * * Example: * @code{.cpp} @@ -62,14 +63,21 @@ namespace detail { * iter[n] == n * n * @endcode * - * @param start The starting value of the counting iterator + * @param start The starting value of the counting iterator (must be size_type or smaller type). * @param f The unary function to apply to the counting iterator. * @return A transform iterator that applies `f` to a counting iterator */ -template -CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(cudf::size_type start, +template +CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start, UnaryFunction f) { + // Check if the `start` for counting_iterator is of size_type or a smaller integral type + static_assert( + cuda::std::is_integral_v and + cuda::std::numeric_limits::digits <= + cuda::std::numeric_limits::digits, + "The `start` for the counting_transform_iterator must be size_type or smaller type"); + return thrust::make_transform_iterator(thrust::make_counting_iterator(start), f); } diff --git a/cpp/include/cudf/detail/utilities/batched_memset.hpp b/cpp/include/cudf/detail/utilities/batched_memset.hpp index 75f738f7529..78be5b91248 100644 --- a/cpp/include/cudf/detail/utilities/batched_memset.hpp +++ b/cpp/include/cudf/detail/utilities/batched_memset.hpp @@ -53,8 +53,8 @@ void batched_memset(std::vector> const& bufs, cudf::detail::make_device_uvector_async(bufs, stream, cudf::get_current_device_resource_ref()); // get a vector with the sizes of all buffers - auto sizes = cudf::detail::make_counting_transform_iterator( - static_cast(0), + auto sizes = thrust::make_transform_iterator( + thrust::counting_iterator(0), cuda::proclaim_return_type( [gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].size(); })); diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index fb0b25cf9f1..de2f1770e28 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -65,19 +65,20 @@ rmm::device_uvector make_chars_buffer(column_view const& offsets, auto chars_data = rmm::device_uvector(chars_size, stream, mr); auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets); - auto const src_ptrs = cudf::detail::make_counting_transform_iterator( - 0u, cuda::proclaim_return_type([begin] __device__(uint32_t idx) { + auto const src_ptrs = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type([begin] __device__(uint32_t idx) { // Due to a bug in cub (https://github.com/NVIDIA/cccl/issues/586), // we have to use `const_cast` to remove `const` qualifier from the source pointer. // This should be fine as long as we only read but not write anything to the source. return reinterpret_cast(const_cast(begin[idx].first)); })); - auto const src_sizes = cudf::detail::make_counting_transform_iterator( - 0u, cuda::proclaim_return_type([begin] __device__(uint32_t idx) { - return begin[idx].second; - })); - auto const dst_ptrs = cudf::detail::make_counting_transform_iterator( - 0u, + auto const src_sizes = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [begin] __device__(uint32_t idx) { return begin[idx].second; })); + auto const dst_ptrs = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), cuda::proclaim_return_type([offsets = d_offsets, output = chars_data.data()] __device__( uint32_t idx) { return output + offsets[idx]; })); diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index b3276c81c1f..0d24fa4236f 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -21,6 +21,7 @@ #include +#include #include namespace cudf::io::parquet::detail { @@ -476,9 +477,9 @@ void WriteFinalOffsets(host_span offsets, auto d_src_data = cudf::detail::make_device_uvector_async( offsets, stream, cudf::get_current_device_resource_ref()); // Iterator for the source (scalar) data - auto src_iter = cudf::detail::make_counting_transform_iterator( - static_cast(0), - cuda::proclaim_return_type( + auto src_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( [src = d_src_data.begin()] __device__(std::size_t i) { return src + i; })); // Copy buffer addresses to device and create an iterator diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 5138a92ac14..60e35465793 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -1629,10 +1629,10 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num get_page_nesting_size{ d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()}); - // Manually create a int64_t `key_start` compatible counting_transform_iterator to avoid - // implicit casting to size_type. - auto const reduction_keys = thrust::make_transform_iterator( - thrust::make_counting_iterator(key_start), get_reduction_key{subpass.pages.size()}); + // Manually create a size_t `key_start` compatible counting_transform_iterator. + auto const reduction_keys = + thrust::make_transform_iterator(thrust::make_counting_iterator(key_start), + get_reduction_key{subpass.pages.size()}); // Find the size of each column thrust::reduce_by_key(rmm::exec_policy_nosync(_stream), diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 8ad1fea649d..5f1aea71f73 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1358,10 +1358,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); - auto const it = cudf::detail::make_counting_transform_iterator(0l, [num_rows](int64_t i) { - return (i % num_rows) % static_cast(std::numeric_limits::max() / 2); - }); - auto const col = data_col(it, it + num_rows); + auto const it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [num_rows](int64_t i) { + return (i % num_rows) % static_cast(std::numeric_limits::max() / 2); + }); + auto const col = data_col(it, it + num_rows); auto const chunk_table = cudf::table_view{{col}}; std::vector data_buffer; From 637e3206a4656bd38636f3fadf3c4573c7bc906a Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 22 Oct 2024 10:28:07 +0100 Subject: [PATCH 14/76] Unify treatment of `Expr` and `IR` nodes in cudf-polars DSL (#17016) As part of in-progress multi-GPU work, we will likely want to: 1. Introduce additional nodes into the `IR` namespace; 2. Implement rewrite rules for `IR` trees to express needed communication patterns; 3. Write visitors that translate expressions into an appropriate description for whichever multi-GPU approach we end up taking. It was already straightforward to write generic visitors for `Expr` nodes, since those uniformly have a `.children` property for their dependents. In contrast, the `IR` nodes were more ad-hoc. To solve this, pull out the generic implementation from `Expr` into an abstract `Node` class. Now `Expr` nodes just inherit from this, and `IR` nodes do so similarly. Redoing the `IR` nodes is a little painful because we want to make them hashable, so we have to provide a bunch of custom `get_hashable` implementations (the schema dict, for example, is not hashable). With these generic facilities in place, we can now implement traversal and visitor infrastructure. Specifically, we provide: - a mechanism for pre-order traversal of an expression DAG, yielding each unique node exactly once. This is useful if one wants to know if an expression contains some particular node; - a mechanism for writing recursive visitors and then wrapping a caching scheme around the outside. This is useful for rewrites. Some example usages are shown in tests. Authors: - Lawrence Mitchell (https://github.com/wence-) - Richard (Rick) Zamora (https://github.com/rjzamora) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Matthew Murray (https://github.com/Matt711) - Richard (Rick) Zamora (https://github.com/rjzamora) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17016 --- .../dsl/expressions/aggregation.py | 5 +- .../cudf_polars/dsl/expressions/base.py | 97 +--- .../cudf_polars/dsl/expressions/binaryop.py | 5 +- .../cudf_polars/dsl/expressions/boolean.py | 5 +- .../cudf_polars/dsl/expressions/datetime.py | 5 +- .../cudf_polars/dsl/expressions/literal.py | 14 +- .../cudf_polars/dsl/expressions/rolling.py | 10 +- .../cudf_polars/dsl/expressions/selection.py | 10 +- .../cudf_polars/dsl/expressions/sorting.py | 10 +- .../cudf_polars/dsl/expressions/string.py | 5 +- .../cudf_polars/dsl/expressions/ternary.py | 5 +- .../cudf_polars/dsl/expressions/unary.py | 14 +- python/cudf_polars/cudf_polars/dsl/ir.py | 527 ++++++++++++------ .../cudf_polars/cudf_polars/dsl/nodebase.py | 152 +++++ .../cudf_polars/cudf_polars/dsl/translate.py | 40 +- .../cudf_polars/cudf_polars/dsl/traversal.py | 175 ++++++ .../cudf_polars/typing/__init__.py | 50 +- python/cudf_polars/docs/overview.md | 229 +++++++- python/cudf_polars/pyproject.toml | 2 +- python/cudf_polars/tests/dsl/test_expr.py | 21 + .../cudf_polars/tests/dsl/test_traversal.py | 229 ++++++++ python/cudf_polars/tests/test_config.py | 6 +- 22 files changed, 1253 insertions(+), 363 deletions(-) create mode 100644 python/cudf_polars/cudf_polars/dsl/nodebase.py create mode 100644 python/cudf_polars/cudf_polars/dsl/traversal.py create mode 100644 python/cudf_polars/tests/dsl/test_traversal.py diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py index b8b18ec5039..41b1defab39 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py @@ -30,14 +30,13 @@ class Agg(Expr): - __slots__ = ("name", "options", "op", "request", "children") + __slots__ = ("name", "options", "op", "request") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, dtype: plc.DataType, name: str, options: Any, *children: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.name = name self.options = options self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/base.py b/python/cudf_polars/cudf_polars/dsl/expressions/base.py index 8d021b0231d..effe8cb2378 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/base.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/base.py @@ -13,9 +13,10 @@ import pylibcudf as plc from cudf_polars.containers import Column +from cudf_polars.dsl.nodebase import Node if TYPE_CHECKING: - from collections.abc import Mapping, Sequence + from collections.abc import Mapping from cudf_polars.containers import Column, DataFrame @@ -32,100 +33,16 @@ class ExecutionContext(IntEnum): ROLLING = enum.auto() -class Expr: - """ - An abstract expression object. +class Expr(Node["Expr"]): + """An abstract expression object.""" - This contains a (potentially empty) tuple of child expressions, - along with non-child data. For uniform reconstruction and - implementation of hashing and equality schemes, child classes need - to provide a certain amount of metadata when they are defined. - Specifically, the ``_non_child`` attribute must list, in-order, - the names of the slots that are passed to the constructor. The - constructor must take arguments in the order ``(*_non_child, - *children).`` - """ - - __slots__ = ("dtype", "_hash_value", "_repr_value") + __slots__ = ("dtype",) dtype: plc.DataType """Data type of the expression.""" - _hash_value: int - """Caching slot for the hash of the expression.""" - _repr_value: str - """Caching slot for repr of the expression.""" - children: tuple[Expr, ...] = () - """Children of the expression.""" + # This annotation is needed because of https://github.com/python/mypy/issues/17981 _non_child: ClassVar[tuple[str, ...]] = ("dtype",) """Names of non-child data (not Exprs) for reconstruction.""" - # Constructor must take arguments in order (*_non_child, *children) - def __init__(self, dtype: plc.DataType) -> None: - self.dtype = dtype - - def _ctor_arguments(self, children: Sequence[Expr]) -> Sequence: - return (*(getattr(self, attr) for attr in self._non_child), *children) - - def get_hash(self) -> int: - """ - Return the hash of this expr. - - Override this in subclasses, rather than __hash__. - - Returns - ------- - The integer hash value. - """ - return hash((type(self), self._ctor_arguments(self.children))) - - def __hash__(self) -> int: - """Hash of an expression with caching.""" - try: - return self._hash_value - except AttributeError: - self._hash_value = self.get_hash() - return self._hash_value - - def is_equal(self, other: Any) -> bool: - """ - Equality of two expressions. - - Override this in subclasses, rather than __eq__. - - Parameter - --------- - other - object to compare to - - Returns - ------- - True if the two expressions are equal, false otherwise. - """ - if type(self) is not type(other): - return False # pragma: no cover; __eq__ trips first - return self._ctor_arguments(self.children) == other._ctor_arguments( - other.children - ) - - def __eq__(self, other: Any) -> bool: - """Equality of expressions.""" - if type(self) is not type(other) or hash(self) != hash(other): - return False - else: - return self.is_equal(other) - - def __ne__(self, other: Any) -> bool: - """Inequality of expressions.""" - return not self.__eq__(other) - - def __repr__(self) -> str: - """String representation of an expression with caching.""" - try: - return self._repr_value - except AttributeError: - args = ", ".join(f"{arg!r}" for arg in self._ctor_arguments(self.children)) - self._repr_value = f"{type(self).__name__}({args})" - return self._repr_value - def do_evaluate( self, df: DataFrame, @@ -311,11 +228,11 @@ class Col(Expr): __slots__ = ("name",) _non_child = ("dtype", "name") name: str - children: tuple[()] def __init__(self, dtype: plc.DataType, name: str) -> None: self.dtype = dtype self.name = name + self.children = () def do_evaluate( self, diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py index 19baae3611d..11a47e7ea51 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py @@ -24,9 +24,8 @@ class BinOp(Expr): - __slots__ = ("op", "children") + __slots__ = ("op",) _non_child = ("dtype", "op") - children: tuple[Expr, Expr] def __init__( self, @@ -35,7 +34,7 @@ def __init__( left: Expr, right: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype if plc.traits.is_boolean(self.dtype): # For boolean output types, bitand and bitor implement # boolean logic, so translate. bitxor also does, but the diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py index ff9973a47d5..9c14a8386f3 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py @@ -31,9 +31,8 @@ class BooleanFunction(Expr): - __slots__ = ("name", "options", "children") + __slots__ = ("name", "options") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, @@ -42,7 +41,7 @@ def __init__( options: tuple[Any, ...], *children: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.name = name self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py index f752a23b628..596e193d8fe 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py @@ -25,7 +25,7 @@ class TemporalFunction(Expr): - __slots__ = ("name", "options", "children") + __slots__ = ("name", "options") _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = { pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR, pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH, @@ -39,7 +39,6 @@ class TemporalFunction(Expr): pl_expr.TemporalFunction.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND, } _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, @@ -48,7 +47,7 @@ def __init__( options: tuple[Any, ...], *children: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.name = name self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py index 562a2255033..c8aa993b994 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py @@ -16,7 +16,7 @@ from cudf_polars.utils import dtypes if TYPE_CHECKING: - from collections.abc import Mapping + from collections.abc import Hashable, Mapping import pyarrow as pa @@ -31,12 +31,12 @@ class Literal(Expr): __slots__ = ("value",) _non_child = ("dtype", "value") value: pa.Scalar[Any] - children: tuple[()] def __init__(self, dtype: plc.DataType, value: pa.Scalar[Any]) -> None: - super().__init__(dtype) + self.dtype = dtype assert value.type == plc.interop.to_arrow(dtype) self.value = value + self.children = () def do_evaluate( self, @@ -58,19 +58,19 @@ class LiteralColumn(Expr): __slots__ = ("value",) _non_child = ("dtype", "value") value: pa.Array[Any, Any] - children: tuple[()] def __init__(self, dtype: plc.DataType, value: pl.Series) -> None: - super().__init__(dtype) + self.dtype = dtype data = value.to_arrow() self.value = data.cast(dtypes.downcast_arrow_lists(data.type)) + self.children = () - def get_hash(self) -> int: + def get_hashable(self) -> Hashable: """Compute a hash of the column.""" # This is stricter than necessary, but we only need this hash # for identity in groupby replacements so it's OK. And this # way we avoid doing potentially expensive compute. - return hash((type(self), self.dtype, id(self.value))) + return (type(self), self.dtype, id(self.value)) def do_evaluate( self, diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py index f7dcc3c542c..fa68bcb9426 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py @@ -17,24 +17,22 @@ class RollingWindow(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr] def __init__(self, dtype: plc.DataType, options: Any, agg: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (agg,) raise NotImplementedError("Rolling window not implemented") class GroupedRollingWindow(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr, ...] def __init__(self, dtype: plc.DataType, options: Any, agg: Expr, *by: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (agg, *by) raise NotImplementedError("Grouped rolling window not implemented") diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/selection.py b/python/cudf_polars/cudf_polars/dsl/expressions/selection.py index a7a3e68a28c..0247256e507 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/selection.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/selection.py @@ -23,12 +23,11 @@ class Gather(Expr): - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr, Expr] def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.children = (values, indices) def do_evaluate( @@ -65,12 +64,11 @@ def do_evaluate( class Filter(Expr): - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr, Expr] def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr): - super().__init__(dtype) + self.dtype = dtype self.children = (values, indices) def do_evaluate( diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py b/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py index 861b73ce6a0..99512e2ef52 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py @@ -23,14 +23,13 @@ class Sort(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr] def __init__( self, dtype: plc.DataType, options: tuple[bool, bool, bool], column: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (column,) @@ -59,9 +58,8 @@ def do_evaluate( class SortBy(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr, ...] def __init__( self, @@ -70,7 +68,7 @@ def __init__( column: Expr, *by: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (column, *by) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py index 6669669aadc..62b54c63a8d 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py @@ -28,9 +28,8 @@ class StringFunction(Expr): - __slots__ = ("name", "options", "children", "_regex_program") + __slots__ = ("name", "options", "_regex_program") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, @@ -39,7 +38,7 @@ def __init__( options: tuple[Any, ...], *children: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.name = name self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py b/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py index c7d7a802ded..d2b5d6bae29 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py @@ -26,14 +26,13 @@ class Ternary(Expr): - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr, Expr, Expr] def __init__( self, dtype: plc.DataType, when: Expr, then: Expr, otherwise: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.children = (when, then, otherwise) def do_evaluate( diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py index 3d4d15be1ce..53f6ed29239 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py @@ -26,12 +26,11 @@ class Cast(Expr): """Class representing a cast of an expression.""" - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr] def __init__(self, dtype: plc.DataType, value: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.children = (value,) if not dtypes.can_cast(value.dtype, self.dtype): raise NotImplementedError( @@ -60,7 +59,9 @@ def collect_agg(self, *, depth: int) -> AggInfo: class Len(Expr): """Class representing the length of an expression.""" - children: tuple[()] + def __init__(self, dtype: plc.DataType) -> None: + self.dtype = dtype + self.children = () def do_evaluate( self, @@ -90,9 +91,8 @@ def collect_agg(self, *, depth: int) -> AggInfo: class UnaryFunction(Expr): """Class representing unary functions of an expression.""" - __slots__ = ("name", "options", "children") + __slots__ = ("name", "options") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] # Note: log, and pow are handled via translation to binops _OP_MAPPING: ClassVar[dict[str, plc.unary.UnaryOperator]] = { @@ -142,7 +142,7 @@ class UnaryFunction(Expr): def __init__( self, dtype: plc.DataType, name: str, options: tuple[Any, ...], *children: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.name = name self.options = options self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index e319c363a23..eb93929cf61 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -13,8 +13,8 @@ from __future__ import annotations -import dataclasses import itertools +import json from functools import cache from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar @@ -27,10 +27,11 @@ import cudf_polars.dsl.expr as expr from cudf_polars.containers import Column, DataFrame -from cudf_polars.utils import dtypes, sorting +from cudf_polars.dsl.nodebase import Node +from cudf_polars.utils import dtypes if TYPE_CHECKING: - from collections.abc import Callable, MutableMapping + from collections.abc import Callable, Hashable, MutableMapping, Sequence from typing import Literal from cudf_polars.typing import Schema @@ -121,16 +122,27 @@ def broadcast(*columns: Column, target_length: int | None = None) -> list[Column ] -@dataclasses.dataclass -class IR: +class IR(Node["IR"]): """Abstract plan node, representing an unevaluated dataframe.""" + __slots__ = ("schema",) + # This annotation is needed because of https://github.com/python/mypy/issues/17981 + _non_child: ClassVar[tuple[str, ...]] = ("schema",) schema: Schema """Mapping from column names to their data types.""" - def __post_init__(self): - """Validate preconditions.""" - pass # noqa: PIE790 + def get_hashable(self) -> Hashable: + """ + Hashable representation of node, treating schema dictionary. + + Since the schema is a dictionary, even though it is morally + immutable, it is not hashable. We therefore convert it to + tuples for hashing purposes. + """ + # Schema is the first constructor argument + args = self._ctor_arguments(self.children)[1:] + schema_hash = tuple(self.schema.items()) + return (type(self), schema_hash, args) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """ @@ -159,24 +171,50 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: ) # pragma: no cover -@dataclasses.dataclass class PythonScan(IR): """Representation of input from a python function.""" + __slots__ = ("options", "predicate") + _non_child = ("schema", "options", "predicate") options: Any """Arbitrary options.""" predicate: expr.NamedExpr | None """Filter to apply to the constructed dataframe before returning it.""" - def __post_init__(self): - """Validate preconditions.""" + def __init__(self, schema: Schema, options: Any, predicate: expr.NamedExpr | None): + self.schema = schema + self.options = options + self.predicate = predicate + self.children = () raise NotImplementedError("PythonScan not implemented") -@dataclasses.dataclass class Scan(IR): """Input from files.""" + __slots__ = ( + "typ", + "reader_options", + "cloud_options", + "paths", + "with_columns", + "skip_rows", + "n_rows", + "row_index", + "predicate", + ) + _non_child = ( + "schema", + "typ", + "reader_options", + "cloud_options", + "paths", + "with_columns", + "skip_rows", + "n_rows", + "row_index", + "predicate", + ) typ: str """What type of file are we reading? Parquet, CSV, etc...""" reader_options: dict[str, Any] @@ -185,7 +223,7 @@ class Scan(IR): """Cloud-related authentication options, currently ignored.""" paths: list[str] """List of paths to read from.""" - with_columns: list[str] + with_columns: list[str] | None """Projected columns to return.""" skip_rows: int """Rows to skip at the start when reading.""" @@ -196,9 +234,30 @@ class Scan(IR): predicate: expr.NamedExpr | None """Mask to apply to the read dataframe.""" - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() + def __init__( + self, + schema: Schema, + typ: str, + reader_options: dict[str, Any], + cloud_options: dict[str, Any] | None, + paths: list[str], + with_columns: list[str] | None, + skip_rows: int, + n_rows: int, + row_index: tuple[str, int] | None, + predicate: expr.NamedExpr | None, + ): + self.schema = schema + self.typ = typ + self.reader_options = reader_options + self.cloud_options = cloud_options + self.paths = paths + self.with_columns = with_columns + self.skip_rows = skip_rows + self.n_rows = n_rows + self.row_index = row_index + self.predicate = predicate + self.children = () if self.typ not in ("csv", "parquet", "ndjson"): # pragma: no cover # This line is unhittable ATM since IPC/Anonymous scan raise # on the polars side @@ -258,6 +317,28 @@ def __post_init__(self) -> None: "Reading only parquet metadata to produce row index." ) + def get_hashable(self) -> Hashable: + """ + Hashable representation of the node. + + The options dictionaries are serialised for hashing purposes + as json strings. + """ + schema_hash = tuple(self.schema.items()) + return ( + type(self), + schema_hash, + self.typ, + json.dumps(self.reader_options), + json.dumps(self.cloud_options), + tuple(self.paths), + tuple(self.with_columns) if self.with_columns is not None else None, + self.skip_rows, + self.n_rows, + self.row_index, + self.predicate, + ) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" with_columns = self.with_columns @@ -401,7 +482,6 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.filter(mask) -@dataclasses.dataclass class Cache(IR): """ Return a cached plan node. @@ -409,20 +489,25 @@ class Cache(IR): Used for CSE at the plan level. """ + __slots__ = ("key",) + _non_child = ("schema", "key") key: int """The cache key.""" - value: IR - """The unevaluated node to cache.""" + + def __init__(self, schema: Schema, key: int, value: IR): + self.schema = schema + self.key = key + self.children = (value,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" try: return cache[self.key] except KeyError: - return cache.setdefault(self.key, self.value.evaluate(cache=cache)) + (value,) = self.children + return cache.setdefault(self.key, value.evaluate(cache=cache)) -@dataclasses.dataclass class DataFrameScan(IR): """ Input from an existing polars DataFrame. @@ -430,13 +515,38 @@ class DataFrameScan(IR): This typically arises from ``q.collect().lazy()`` """ + __slots__ = ("df", "projection", "predicate") + _non_child = ("schema", "df", "projection", "predicate") df: Any """Polars LazyFrame object.""" - projection: list[str] + projection: tuple[str, ...] | None """List of columns to project out.""" predicate: expr.NamedExpr | None """Mask to apply.""" + def __init__( + self, + schema: Schema, + df: Any, + projection: Sequence[str] | None, + predicate: expr.NamedExpr | None, + ): + self.schema = schema + self.df = df + self.projection = tuple(projection) if projection is not None else None + self.predicate = predicate + self.children = () + + def get_hashable(self) -> Hashable: + """ + Hashable representation of the node. + + The (heavy) dataframe object is hashed as its id, so this is + not stable across runs, or repeat instances of the same equal dataframes. + """ + schema_hash = tuple(self.schema.items()) + return (type(self), schema_hash, id(self.df), self.projection, self.predicate) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" pdf = pl.DataFrame._from_pydf(self.df) @@ -454,28 +564,39 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df -@dataclasses.dataclass class Select(IR): """Produce a new dataframe selecting given expressions from an input.""" - df: IR - """Input dataframe.""" - expr: list[expr.NamedExpr] + __slots__ = ("exprs", "should_broadcast") + _non_child = ("schema", "exprs", "should_broadcast") + exprs: tuple[expr.NamedExpr, ...] """List of expressions to evaluate to form the new dataframe.""" should_broadcast: bool """Should columns be broadcast?""" + def __init__( + self, + schema: Schema, + exprs: Sequence[expr.NamedExpr], + should_broadcast: bool, # noqa: FBT001 + df: IR, + ): + self.schema = schema + self.exprs = tuple(exprs) + self.should_broadcast = should_broadcast + self.children = (df,) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) # Handle any broadcasting - columns = [e.evaluate(df) for e in self.expr] + columns = [e.evaluate(df) for e in self.exprs] if self.should_broadcast: columns = broadcast(*columns) return DataFrame(columns) -@dataclasses.dataclass class Reduce(IR): """ Produce a new dataframe selecting given expressions from an input. @@ -483,36 +604,73 @@ class Reduce(IR): This is a special case of :class:`Select` where all outputs are a single row. """ - df: IR - """Input dataframe.""" - expr: list[expr.NamedExpr] + __slots__ = ("exprs",) + _non_child = ("schema", "exprs") + exprs: tuple[expr.NamedExpr, ...] """List of expressions to evaluate to form the new dataframe.""" + def __init__( + self, schema: Schema, exprs: Sequence[expr.NamedExpr], df: IR + ): # pragma: no cover; polars doesn't emit this node yet + self.schema = schema + self.exprs = tuple(exprs) + self.children = (df,) + def evaluate( self, *, cache: MutableMapping[int, DataFrame] ) -> DataFrame: # pragma: no cover; polars doesn't emit this node yet """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) - columns = broadcast(*(e.evaluate(df) for e in self.expr)) + (child,) = self.children + df = child.evaluate(cache=cache) + columns = broadcast(*(e.evaluate(df) for e in self.exprs)) assert all(column.obj.size() == 1 for column in columns) return DataFrame(columns) -@dataclasses.dataclass class GroupBy(IR): """Perform a groupby.""" - df: IR - """Input dataframe.""" - agg_requests: list[expr.NamedExpr] - """List of expressions to evaluate groupwise.""" - keys: list[expr.NamedExpr] - """List of expressions forming the keys.""" + __slots__ = ( + "agg_requests", + "keys", + "maintain_order", + "options", + "agg_infos", + ) + _non_child = ("schema", "keys", "agg_requests", "maintain_order", "options") + keys: tuple[expr.NamedExpr, ...] + """Grouping keys.""" + agg_requests: tuple[expr.NamedExpr, ...] + """Aggregation expressions.""" maintain_order: bool - """Should the order of the input dataframe be maintained?""" + """Preserve order in groupby.""" options: Any - """Options controlling style of groupby.""" - agg_infos: list[expr.AggInfo] = dataclasses.field(init=False) + """Arbitrary options.""" + + def __init__( + self, + schema: Schema, + keys: Sequence[expr.NamedExpr], + agg_requests: Sequence[expr.NamedExpr], + maintain_order: bool, # noqa: FBT001 + options: Any, + df: IR, + ): + self.schema = schema + self.keys = tuple(keys) + self.agg_requests = tuple(agg_requests) + self.maintain_order = maintain_order + self.options = options + self.children = (df,) + if self.options.rolling: + raise NotImplementedError( + "rolling window/groupby" + ) # pragma: no cover; rollingwindow constructor has already raised + if any(GroupBy.check_agg(a.value) > 1 for a in self.agg_requests): + raise NotImplementedError("Nested aggregations in groupby") + self.agg_infos = [req.collect_agg(depth=0) for req in self.agg_requests] + if len(self.keys) == 0: + raise NotImplementedError("dynamic groupby") @staticmethod def check_agg(agg: expr.Expr) -> int: @@ -542,22 +700,10 @@ def check_agg(agg: expr.Expr) -> int: else: raise NotImplementedError(f"No handler for {agg=}") - def __post_init__(self) -> None: - """Check whether all the aggregations are implemented.""" - super().__post_init__() - if self.options.rolling: - raise NotImplementedError( - "rolling window/groupby" - ) # pragma: no cover; rollingwindow constructor has already raised - if any(GroupBy.check_agg(a.value) > 1 for a in self.agg_requests): - raise NotImplementedError("Nested aggregations in groupby") - self.agg_infos = [req.collect_agg(depth=0) for req in self.agg_requests] - if len(self.keys) == 0: - raise NotImplementedError("dynamic groupby") - def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) keys = broadcast( *(k.evaluate(df) for k in self.keys), target_length=df.num_rows ) @@ -646,17 +792,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(broadcasted).slice(self.options.slice) -@dataclasses.dataclass class Join(IR): """A join of two dataframes.""" - left: IR - """Left frame.""" - right: IR - """Right frame.""" - left_on: list[expr.NamedExpr] + __slots__ = ("left_on", "right_on", "options") + _non_child = ("schema", "left_on", "right_on", "options") + left_on: tuple[expr.NamedExpr, ...] """List of expressions used as keys in the left frame.""" - right_on: list[expr.NamedExpr] + right_on: tuple[expr.NamedExpr, ...] """List of expressions used as keys in the right frame.""" options: tuple[ Literal["inner", "left", "right", "full", "leftsemi", "leftanti", "cross"], @@ -674,9 +817,20 @@ class Join(IR): - coalesce: should key columns be coalesced (only makes sense for outer joins) """ - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() + def __init__( + self, + schema: Schema, + left_on: Sequence[expr.NamedExpr], + right_on: Sequence[expr.NamedExpr], + options: Any, + left: IR, + right: IR, + ): + self.schema = schema + self.left_on = tuple(left_on) + self.right_on = tuple(right_on) + self.options = options + self.children = (left, right) if any( isinstance(e.value, expr.Literal) for e in itertools.chain(self.left_on, self.right_on) @@ -777,8 +931,7 @@ def _reorder_maps( def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - left = self.left.evaluate(cache=cache) - right = self.right.evaluate(cache=cache) + left, right = (c.evaluate(cache=cache) for c in self.children) how, join_nulls, zlice, suffix, coalesce = self.options suffix = "_right" if suffix is None else suffix if how == "cross": @@ -866,20 +1019,30 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(zlice) -@dataclasses.dataclass class HStack(IR): """Add new columns to a dataframe.""" - df: IR - """Input dataframe.""" - columns: list[expr.NamedExpr] - """List of expressions to produce new columns.""" + __slots__ = ("columns", "should_broadcast") + _non_child = ("schema", "columns", "should_broadcast") should_broadcast: bool - """Should columns be broadcast?""" + """Should the resulting evaluated columns be broadcast to the same length.""" + + def __init__( + self, + schema: Schema, + columns: Sequence[expr.NamedExpr], + should_broadcast: bool, # noqa: FBT001 + df: IR, + ): + self.schema = schema + self.columns = tuple(columns) + self.should_broadcast = should_broadcast + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) columns = [c.evaluate(df) for c in self.columns] if self.should_broadcast: columns = broadcast(*columns, target_length=df.num_rows) @@ -895,20 +1058,36 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.with_columns(columns) -@dataclasses.dataclass class Distinct(IR): """Produce a new dataframe with distinct rows.""" - df: IR - """Input dataframe.""" + __slots__ = ("keep", "subset", "zlice", "stable") + _non_child = ("schema", "keep", "subset", "zlice", "stable") keep: plc.stream_compaction.DuplicateKeepOption - """Which rows to keep.""" - subset: set[str] | None - """Which columns to inspect when computing distinct rows.""" + """Which distinct value to keep.""" + subset: frozenset[str] | None + """Which columns should be used to define distinctness. If None, + then all columns are used.""" zlice: tuple[int, int] | None - """Optional slice to perform after compaction.""" + """Optional slice to apply to the result.""" stable: bool - """Should order be preserved?""" + """Should the result maintain ordering.""" + + def __init__( + self, + schema: Schema, + keep: plc.stream_compaction.DuplicateKeepOption, + subset: frozenset[str] | None, + zlice: tuple[int, int] | None, + stable: bool, # noqa: FBT001 + df: IR, + ): + self.schema = schema + self.keep = keep + self.subset = subset + self.zlice = zlice + self.stable = stable + self.children = (df,) _KEEP_MAP: ClassVar[dict[str, plc.stream_compaction.DuplicateKeepOption]] = { "first": plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST, @@ -917,18 +1096,10 @@ class Distinct(IR): "any": plc.stream_compaction.DuplicateKeepOption.KEEP_ANY, } - def __init__(self, schema: Schema, df: IR, options: Any) -> None: - self.schema = schema - self.df = df - (keep, subset, maintain_order, zlice) = options - self.keep = Distinct._KEEP_MAP[keep] - self.subset = set(subset) if subset is not None else None - self.stable = maintain_order - self.zlice = zlice - def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) if self.subset is None: indices = list(range(df.num_columns)) keys_sorted = all(c.is_sorted for c in df.column_map.values()) @@ -967,46 +1138,44 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(self.zlice) -@dataclasses.dataclass class Sort(IR): """Sort a dataframe.""" - df: IR - """Input.""" - by: list[expr.NamedExpr] - """List of expressions to produce sort keys.""" - do_sort: Callable[..., plc.Table] - """pylibcudf sorting function.""" + __slots__ = ("by", "order", "null_order", "stable", "zlice") + _non_child = ("schema", "by", "order", "null_order", "stable", "zlice") + by: tuple[expr.NamedExpr, ...] + """Sort keys.""" + order: tuple[plc.types.Order, ...] + """Sort order for each sort key.""" + null_order: tuple[plc.types.NullOrder, ...] + """Null sorting location for each sort key.""" + stable: bool + """Should the sort be stable?""" zlice: tuple[int, int] | None - """Optional slice to apply after sorting.""" - order: list[plc.types.Order] - """Order keys should be sorted in.""" - null_order: list[plc.types.NullOrder] - """Where nulls sort to.""" + """Optional slice to apply to the result.""" def __init__( self, schema: Schema, - df: IR, - by: list[expr.NamedExpr], - options: Any, + by: Sequence[expr.NamedExpr], + order: Sequence[plc.types.Order], + null_order: Sequence[plc.types.NullOrder], + stable: bool, # noqa: FBT001 zlice: tuple[int, int] | None, - ) -> None: + df: IR, + ): self.schema = schema - self.df = df - self.by = by + self.by = tuple(by) + self.order = tuple(order) + self.null_order = tuple(null_order) + self.stable = stable self.zlice = zlice - stable, nulls_last, descending = options - self.order, self.null_order = sorting.sort_order( - descending, nulls_last=nulls_last, num_keys=len(by) - ) - self.do_sort = ( - plc.sorting.stable_sort_by_key if stable else plc.sorting.sort_by_key - ) + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) sort_keys = broadcast( *(k.evaluate(df) for k in self.by), target_length=df.num_rows ) @@ -1016,11 +1185,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: for i, k in enumerate(sort_keys) if k.name in df.column_map and k.obj is df.column_map[k.name].obj } - table = self.do_sort( + do_sort = ( + plc.sorting.stable_sort_by_key if self.stable else plc.sorting.sort_by_key + ) + table = do_sort( df.table, plc.Table([k.obj for k in sort_keys]), - self.order, - self.null_order, + list(self.order), + list(self.null_order), ) columns: list[Column] = [] for name, c in zip(df.column_map, table.columns(), strict=True): @@ -1037,49 +1209,64 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns).slice(self.zlice) -@dataclasses.dataclass class Slice(IR): """Slice a dataframe.""" - df: IR - """Input.""" + __slots__ = ("offset", "length") + _non_child = ("schema", "offset", "length") offset: int """Start of the slice.""" length: int """Length of the slice.""" + def __init__(self, schema: Schema, offset: int, length: int, df: IR): + self.schema = schema + self.offset = offset + self.length = length + self.children = (df,) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) return df.slice((self.offset, self.length)) -@dataclasses.dataclass class Filter(IR): """Filter a dataframe with a boolean mask.""" - df: IR - """Input.""" + __slots__ = ("mask",) + _non_child = ("schema", "mask") mask: expr.NamedExpr - """Expression evaluating to a mask.""" + """Expression to produce the filter mask.""" + + def __init__(self, schema: Schema, mask: expr.NamedExpr, df: IR): + self.schema = schema + self.mask = mask + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) (mask,) = broadcast(self.mask.evaluate(df), target_length=df.num_rows) return df.filter(mask) -@dataclasses.dataclass class Projection(IR): """Select a subset of columns from a dataframe.""" - df: IR - """Input.""" + __slots__ = () + _non_child = ("schema",) + + def __init__(self, schema: Schema, df: IR): + self.schema = schema + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) # This can reorder things. columns = broadcast( *(df.column_map[name] for name in self.schema), target_length=df.num_rows @@ -1087,16 +1274,15 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns) -@dataclasses.dataclass class MapFunction(IR): """Apply some function to a dataframe.""" - df: IR - """Input.""" + __slots__ = ("name", "options") + _non_child = ("schema", "name", "options") name: str - """Function name.""" + """Name of the function to apply""" options: Any - """Arbitrary options, interpreted per function.""" + """Arbitrary name-specific options""" _NAMES: ClassVar[frozenset[str]] = frozenset( [ @@ -1111,9 +1297,11 @@ class MapFunction(IR): ] ) - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() + def __init__(self, schema: Schema, name: str, options: Any, df: IR): + self.schema = schema + self.name = name + self.options = options + self.children = (df,) if self.name not in MapFunction._NAMES: raise NotImplementedError(f"Unhandled map function {self.name}") if self.name == "explode": @@ -1127,7 +1315,7 @@ def __post_init__(self) -> None: old, new, _ = self.options # TODO: perhaps polars should validate renaming in the IR? if len(new) != len(set(new)) or ( - set(new) & (set(self.df.schema.keys()) - set(old)) + set(new) & (set(df.schema.keys()) - set(old)) ): raise NotImplementedError("Duplicate new names in rename.") elif self.name == "unpivot": @@ -1136,31 +1324,31 @@ def __post_init__(self) -> None: variable_name = "variable" if variable_name is None else variable_name if len(pivotees) == 0: index = frozenset(indices) - pivotees = [name for name in self.df.schema if name not in index] + pivotees = [name for name in df.schema if name not in index] if not all( - dtypes.can_cast(self.df.schema[p], self.schema[value_name]) - for p in pivotees + dtypes.can_cast(df.schema[p], self.schema[value_name]) for p in pivotees ): raise NotImplementedError( "Unpivot cannot cast all input columns to " f"{self.schema[value_name].id()}" ) - self.options = (indices, pivotees, variable_name, value_name) + self.options = (tuple(indices), tuple(pivotees), variable_name, value_name) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" + (child,) = self.children if self.name == "rechunk": # No-op in our data model # Don't think this appears in a plan tree from python - return self.df.evaluate(cache=cache) # pragma: no cover + return child.evaluate(cache=cache) # pragma: no cover elif self.name == "rename": - df = self.df.evaluate(cache=cache) + df = child.evaluate(cache=cache) # final tag is "swapping" which is useful for the # optimiser (it blocks some pushdown operations) old, new, _ = self.options return df.rename_columns(dict(zip(old, new, strict=True))) elif self.name == "explode": - df = self.df.evaluate(cache=cache) + df = child.evaluate(cache=cache) ((to_explode,),) = self.options index = df.column_names.index(to_explode) subset = df.column_names_set - {to_explode} @@ -1170,7 +1358,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: elif self.name == "unpivot": indices, pivotees, variable_name, value_name = self.options npiv = len(pivotees) - df = self.df.evaluate(cache=cache) + df = child.evaluate(cache=cache) index_columns = [ Column(col, name=name) for col, name in zip( @@ -1209,37 +1397,40 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: raise AssertionError("Should never be reached") # pragma: no cover -@dataclasses.dataclass class Union(IR): """Concatenate dataframes vertically.""" - dfs: list[IR] - """List of inputs.""" + __slots__ = ("zlice",) + _non_child = ("schema", "zlice") zlice: tuple[int, int] | None - """Optional slice to apply after concatenation.""" + """Optional slice to apply to the result.""" - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() - schema = self.dfs[0].schema - if not all(s.schema == schema for s in self.dfs[1:]): + def __init__(self, schema: Schema, zlice: tuple[int, int] | None, *children: IR): + self.schema = schema + self.zlice = zlice + self.children = children + schema = self.children[0].schema + if not all(s.schema == schema for s in self.children[1:]): raise NotImplementedError("Schema mismatch") def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" # TODO: only evaluate what we need if we have a slice - dfs = [df.evaluate(cache=cache) for df in self.dfs] + dfs = [df.evaluate(cache=cache) for df in self.children] return DataFrame.from_table( plc.concatenate.concatenate([df.table for df in dfs]), dfs[0].column_names ).slice(self.zlice) -@dataclasses.dataclass class HConcat(IR): """Concatenate dataframes horizontally.""" - dfs: list[IR] - """List of inputs.""" + __slots__ = () + _non_child = ("schema",) + + def __init__(self, schema: Schema, *children: IR): + self.schema = schema + self.children = children @staticmethod def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table: @@ -1271,7 +1462,7 @@ def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table: def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - dfs = [df.evaluate(cache=cache) for df in self.dfs] + dfs = [df.evaluate(cache=cache) for df in self.children] max_rows = max(df.num_rows for df in dfs) # Horizontal concatenation extends shorter tables with nulls dfs = [ diff --git a/python/cudf_polars/cudf_polars/dsl/nodebase.py b/python/cudf_polars/cudf_polars/dsl/nodebase.py new file mode 100644 index 00000000000..228d300f467 --- /dev/null +++ b/python/cudf_polars/cudf_polars/dsl/nodebase.py @@ -0,0 +1,152 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +"""Base class for IR nodes, and utilities.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar + +if TYPE_CHECKING: + from collections.abc import Hashable, Sequence + + from typing_extensions import Self + + +__all__: list[str] = ["Node"] + +T = TypeVar("T", bound="Node[Any]") + + +class Node(Generic[T]): + """ + An abstract node type. + + Nodes are immutable! + + This contains a (potentially empty) tuple of child nodes, + along with non-child data. For uniform reconstruction and + implementation of hashing and equality schemes, child classes need + to provide a certain amount of metadata when they are defined. + Specifically, the ``_non_child`` attribute must list, in-order, + the names of the slots that are passed to the constructor. The + constructor must take arguments in the order ``(*_non_child, + *children).`` + """ + + __slots__ = ("_hash_value", "_repr_value", "children") + _hash_value: int + _repr_value: str + children: tuple[T, ...] + _non_child: ClassVar[tuple[str, ...]] = () + + def _ctor_arguments(self, children: Sequence[T]) -> Sequence[Any | T]: + return (*(getattr(self, attr) for attr in self._non_child), *children) + + def reconstruct( + self, children: Sequence[T] + ) -> Self: # pragma: no cover; not yet used + """ + Rebuild this node with new children. + + Parameters + ---------- + children + New children + + Returns + ------- + New node with new children. Non-child data is shared with the input. + """ + return type(self)(*self._ctor_arguments(children)) + + def get_hashable(self) -> Hashable: + """ + Return a hashable object for the node. + + Returns + ------- + Hashable object. + + Notes + ----- + This method is used by the :meth:`__hash__` implementation + (which does caching). If your node type needs special-case + handling for some of its attributes, override this method, not + :meth:`__hash__`. + """ + return (type(self), self._ctor_arguments(self.children)) + + def __hash__(self) -> int: + """ + Hash of an expression with caching. + + See Also + -------- + get_hashable + """ + try: + return self._hash_value + except AttributeError: + self._hash_value = hash(self.get_hashable()) + return self._hash_value + + def is_equal(self, other: Self) -> bool: + """ + Equality of two nodes of equal type. + + Override this in subclasses, rather than :meth:`__eq__`. + + Parameter + --------- + other + object of same type to compare to. + + Notes + ----- + Since nodes are immutable, this does common subexpression + elimination when two nodes are determined to be equal. + + :meth:`__eq__` handles the case where the objects being + compared are not of the same type, so in this method, we only + need to implement equality of equal types. + + Returns + ------- + True if the two nodes are equal, false otherwise. + """ + if self is other: + return True + result = self._ctor_arguments(self.children) == other._ctor_arguments( + other.children + ) + # Eager CSE for nodes that match. + if result: + self.children = other.children + return result + + def __eq__(self, other: Any) -> bool: + """ + Equality of expressions. + + See Also + -------- + is_equal + """ + if type(self) is not type(other) or hash(self) != hash(other): + return False + else: + return self.is_equal(other) + + def __ne__(self, other: Any) -> bool: + """Inequality of expressions.""" + return not self.__eq__(other) + + def __repr__(self) -> str: + """String representation of an expression with caching.""" + try: + return self._repr_value + except AttributeError: + args = ", ".join(f"{arg!r}" for arg in self._ctor_arguments(self.children)) + self._repr_value = f"{type(self).__name__}({args})" + return self._repr_value diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index a0291037f01..522c4a6729c 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -20,7 +20,7 @@ from cudf_polars.dsl import expr, ir from cudf_polars.typing import NodeTraverser -from cudf_polars.utils import dtypes +from cudf_polars.utils import dtypes, sorting __all__ = ["translate_ir", "translate_named_expr"] @@ -148,7 +148,7 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) exprs = [translate_named_expr(visitor, n=e) for e in node.expr] - return ir.Select(schema, inp, exprs, node.should_broadcast) + return ir.Select(schema, exprs, node.should_broadcast, inp) @_translate_ir.register @@ -161,11 +161,11 @@ def _( keys = [translate_named_expr(visitor, n=e) for e in node.keys] return ir.GroupBy( schema, - inp, - aggs, keys, + aggs, node.maintain_order, node.options, + inp, ) @@ -182,7 +182,7 @@ def _( with set_node(visitor, node.input_right): inp_right = translate_ir(visitor, n=None) right_on = [translate_named_expr(visitor, n=e) for e in node.right_on] - return ir.Join(schema, inp_left, inp_right, left_on, right_on, node.options) + return ir.Join(schema, left_on, right_on, node.options, inp_left, inp_right) @_translate_ir.register @@ -192,7 +192,7 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) exprs = [translate_named_expr(visitor, n=e) for e in node.exprs] - return ir.HStack(schema, inp, exprs, node.should_broadcast) + return ir.HStack(schema, exprs, node.should_broadcast, inp) @_translate_ir.register @@ -202,17 +202,23 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) exprs = [translate_named_expr(visitor, n=e) for e in node.expr] - return ir.Reduce(schema, inp, exprs) + return ir.Reduce(schema, exprs, inp) @_translate_ir.register def _( node: pl_ir.Distinct, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: + (keep, subset, maintain_order, zlice) = node.options + keep = ir.Distinct._KEEP_MAP[keep] + subset = frozenset(subset) if subset is not None else None return ir.Distinct( schema, + keep, + subset, + zlice, + maintain_order, translate_ir(visitor, n=node.input), - node.options, ) @@ -223,14 +229,18 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) by = [translate_named_expr(visitor, n=e) for e in node.by_column] - return ir.Sort(schema, inp, by, node.sort_options, node.slice) + stable, nulls_last, descending = node.sort_options + order, null_order = sorting.sort_order( + descending, nulls_last=nulls_last, num_keys=len(by) + ) + return ir.Sort(schema, by, order, null_order, stable, node.slice, inp) @_translate_ir.register def _( node: pl_ir.Slice, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: - return ir.Slice(schema, translate_ir(visitor, n=node.input), node.offset, node.len) + return ir.Slice(schema, node.offset, node.len, translate_ir(visitor, n=node.input)) @_translate_ir.register @@ -240,7 +250,7 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) mask = translate_named_expr(visitor, n=node.predicate) - return ir.Filter(schema, inp, mask) + return ir.Filter(schema, mask, inp) @_translate_ir.register @@ -259,10 +269,10 @@ def _( name, *options = node.function return ir.MapFunction( schema, - # TODO: merge_sorted breaks this pattern - translate_ir(visitor, n=node.input), name, options, + # TODO: merge_sorted breaks this pattern + translate_ir(visitor, n=node.input), ) @@ -271,7 +281,7 @@ def _( node: pl_ir.Union, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: return ir.Union( - schema, [translate_ir(visitor, n=n) for n in node.inputs], node.options + schema, node.options, *(translate_ir(visitor, n=n) for n in node.inputs) ) @@ -279,7 +289,7 @@ def _( def _( node: pl_ir.HConcat, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: - return ir.HConcat(schema, [translate_ir(visitor, n=n) for n in node.inputs]) + return ir.HConcat(schema, *(translate_ir(visitor, n=n) for n in node.inputs)) def translate_ir(visitor: NodeTraverser, *, n: int | None = None) -> ir.IR: diff --git a/python/cudf_polars/cudf_polars/dsl/traversal.py b/python/cudf_polars/cudf_polars/dsl/traversal.py new file mode 100644 index 00000000000..be8338cb9a9 --- /dev/null +++ b/python/cudf_polars/cudf_polars/dsl/traversal.py @@ -0,0 +1,175 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +"""Traversal and visitor utilities for nodes.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Generic + +from cudf_polars.typing import U_contra, V_co + +if TYPE_CHECKING: + from collections.abc import Callable, Generator, Mapping, MutableMapping + + from cudf_polars.typing import GenericTransformer, NodeT + + +__all__: list[str] = [ + "traversal", + "reuse_if_unchanged", + "make_recursive", + "CachingVisitor", +] + + +def traversal(node: NodeT) -> Generator[NodeT, None, None]: + """ + Pre-order traversal of nodes in an expression. + + Parameters + ---------- + node + Root of expression to traverse. + + Yields + ------ + Unique nodes in the expression, parent before child, children + in-order from left to right. + """ + seen = {node} + lifo = [node] + + while lifo: + node = lifo.pop() + yield node + for child in reversed(node.children): + if child not in seen: + seen.add(child) + lifo.append(child) + + +def reuse_if_unchanged(node: NodeT, fn: GenericTransformer[NodeT, NodeT]) -> NodeT: + """ + Recipe for transforming nodes that returns the old object if unchanged. + + Parameters + ---------- + node + Node to recurse on + fn + Function to transform children + + Notes + ----- + This can be used as a generic "base case" handler when + writing transforms that take nodes and produce new nodes. + + Returns + ------- + Existing node `e` if transformed children are unchanged, otherwise + reconstructed node with new children. + """ + new_children = [fn(c) for c in node.children] + if all(new == old for new, old in zip(new_children, node.children, strict=True)): + return node + return node.reconstruct(new_children) + + +def make_recursive( + fn: Callable[[U_contra, GenericTransformer[U_contra, V_co]], V_co], + *, + state: Mapping[str, Any] | None = None, +) -> GenericTransformer[U_contra, V_co]: + """ + No-op wrapper for recursive visitors. + + Facilitates using visitors that don't need caching but are written + in the same style. + + Parameters + ---------- + fn + Function to transform inputs to outputs. Should take as its + second argument a callable from input to output. + state + Arbitrary *immutable* state that should be accessible to the + visitor through the `state` property. + + Notes + ----- + All transformation functions *must* be free of side-effects. + + Usually, prefer a :class:`CachingVisitor`, but if we know that we + don't need caching in a transformation and then this no-op + approach is slightly cheaper. + + Returns + ------- + Recursive function without caching. + + See Also + -------- + CachingVisitor + """ + + def rec(node: U_contra) -> V_co: + return fn(node, rec) # type: ignore[arg-type] + + rec.state = state if state is not None else {} # type: ignore[attr-defined] + return rec # type: ignore[return-value] + + +class CachingVisitor(Generic[U_contra, V_co]): + """ + Caching wrapper for recursive visitors. + + Facilitates writing visitors where already computed results should + be cached and reused. The cache is managed automatically, and is + tied to the lifetime of the wrapper. + + Parameters + ---------- + fn + Function to transform inputs to outputs. Should take as its + second argument the recursive cache manager. + state + Arbitrary *immutable* state that should be accessible to the + visitor through the `state` property. + + Notes + ----- + All transformation functions *must* be free of side-effects. + + Returns + ------- + Recursive function with caching. + """ + + def __init__( + self, + fn: Callable[[U_contra, GenericTransformer[U_contra, V_co]], V_co], + *, + state: Mapping[str, Any] | None = None, + ) -> None: + self.fn = fn + self.cache: MutableMapping[U_contra, V_co] = {} + self.state = state if state is not None else {} + + def __call__(self, value: U_contra) -> V_co: + """ + Apply the function to a value. + + Parameters + ---------- + value + The value to transform. + + Returns + ------- + A transformed value. + """ + try: + return self.cache[value] + except KeyError: + return self.cache.setdefault(value, self.fn(value, self)) diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py index 240b11bdf59..a27a3395c35 100644 --- a/python/cudf_polars/cudf_polars/typing/__init__.py +++ b/python/cudf_polars/cudf_polars/typing/__init__.py @@ -5,8 +5,8 @@ from __future__ import annotations -from collections.abc import Mapping -from typing import TYPE_CHECKING, Literal, Protocol, Union +from collections.abc import Hashable, Mapping +from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union import pylibcudf as plc @@ -18,7 +18,19 @@ import polars as pl -IR: TypeAlias = Union[ + from cudf_polars.dsl import expr, ir, nodebase + +__all__: list[str] = [ + "PolarsIR", + "PolarsExpr", + "NodeTraverser", + "OptimizationArgs", + "GenericTransformer", + "ExprTransformer", + "IRTransformer", +] + +PolarsIR: TypeAlias = Union[ pl_ir.PythonScan, pl_ir.Scan, pl_ir.Cache, @@ -38,7 +50,7 @@ pl_ir.ExtContext, ] -Expr: TypeAlias = Union[ +PolarsExpr: TypeAlias = Union[ pl_expr.Function, pl_expr.Window, pl_expr.Literal, @@ -68,7 +80,7 @@ def set_node(self, n: int) -> None: """Set the current plan node to n.""" ... - def view_current_node(self) -> IR: + def view_current_node(self) -> PolarsIR: """Convert current plan node to python rep.""" ... @@ -80,7 +92,7 @@ def get_dtype(self, n: int) -> pl.DataType: """Get the datatype of the given expression id.""" ... - def view_expression(self, n: int) -> Expr: + def view_expression(self, n: int) -> PolarsExpr: """Convert the given expression to python rep.""" ... @@ -107,3 +119,29 @@ def set_udf( "cluster_with_columns", "no_optimization", ] + + +U_contra = TypeVar("U_contra", bound=Hashable, contravariant=True) +V_co = TypeVar("V_co", covariant=True) +NodeT = TypeVar("NodeT", bound="nodebase.Node[Any]") + + +class GenericTransformer(Protocol[U_contra, V_co]): + """Abstract protocol for recursive visitors.""" + + def __call__(self, __value: U_contra) -> V_co: + """Apply the visitor to the node.""" + ... + + @property + def state(self) -> Mapping[str, Any]: + """Arbitrary immutable state.""" + ... + + +# Quotes to avoid circular import +ExprTransformer: TypeAlias = GenericTransformer["expr.Expr", "expr.Expr"] +"""Protocol for transformation of Expr nodes.""" + +IRTransformer: TypeAlias = GenericTransformer["ir.IR", "ir.IR"] +"""Protocol for transformation of IR nodes.""" diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md index 7837a275f20..74b2cd4e5de 100644 --- a/python/cudf_polars/docs/overview.md +++ b/python/cudf_polars/docs/overview.md @@ -11,14 +11,17 @@ You will need: environment](https://github.com/rapidsai/cudf/blob/branch-24.12/CONTRIBUTING.md#setting-up-your-build-environment). The combined devcontainer works, or whatever your favourite approach is. -> ![NOTE] These instructions will get simpler as we merge code in. +:::{note} +These instructions will get simpler as we merge code in. +::: ## Installing polars -`cudf-polars` works with polars >= 1.3, as long as the internal IR -version doesn't get a major version bump. So `pip install polars>=1.3` -should work. For development, if we're adding things to the polars -side of things, we will need to build polars from source: +The `cudf-polars` `pyproject.toml` advertises which polars versions it +works with. So for pure `cudf-polars` development, installing as +normal and satisfying the dependencies in the repository is +sufficient. For development, if we're adding things to the polars side +of things, we will need to build polars from source: ```sh git clone https://github.com/pola-rs/polars @@ -36,7 +39,9 @@ pip install --upgrade uv uv pip install --upgrade -r py-polars/requirements-dev.txt ``` -> ![NOTE] plain `pip install` works fine, but `uv` is _much_ faster! +:::{note} +plain `pip install` works fine, but `uv` is _much_ faster! +::: Now we have the necessary machinery to build polars ```sh @@ -83,7 +88,7 @@ representation (IR). Second, an execution phase which executes using our IR. The translation phase receives the a low-level Rust `NodeTraverser` -object which delivers Python representations of the plan nodes (and +object that delivers Python representations of the plan nodes (and expressions) one at a time. During translation, we endeavour to raise `NotImplementedError` for any unsupported functionality. This way, if we can't execute something, we just don't modify the logical plan at @@ -126,7 +131,6 @@ arguments, at the moment, `raise_on_fail` is also supported, which raises, rather than falling back, during translation: ```python - result = q.collect(engine=pl.GPUEngine(raise_on_fail=True)) ``` @@ -144,13 +148,73 @@ changes. We can therefore attempt to detect the IR version appropriately. This should be done during IR translation in `translate.py`. -## Adding a handler for a new plan node +# IR design + +As noted, we translate the polars DSL into our own IR. This is both so +that we can smooth out minor version differences (advertised by +`NodeTraverser` version changes) within `cudf-polars`, and so that we +have the freedom to introduce new IR nodes and rewrite rules as might +be appropriate for GPU execution. + +To that end, we provide facilities for definition of nodes as well as +writing traversals and rewrite rules. The abstract base class `Node` +in `dsl/nodebase.py` defines the interface for implementing new nodes, +and provides many useful default methods. See also the docstrings of +the `Node` class. + +:::{note} +This generic implementation relies on nodes being treated as +*immutable*. Do not implement in-place modification of nodes, bad +things will happen. +::: + +## Defining nodes + +A concrete node type (`cudf-polars` has expression nodes, `Expr`; +and plan nodes, `IR`), should inherit from `Node`. Nodes have +two types of data: + +1. `children`: a tuple (possibly empty) of concrete nodes; +2. non-child: arbitrary data attached to the node that is _not_ a + concrete node. + +The base `Node` class requires that one advertise the names of the +non-child attributes in the `_non_child` class variable. The +constructor of the concrete node should take its arguments in the +order `*_non_child` (ordered as the class variable does) and then +`*children`. For example, the `Sort` node, which sorts a column +generated by an expression, has this definition: + +```python +class Expr(Node): + children: tuple[Expr, ...] + +class Sort(Expr): + _non_child = ("dtype", "options") + children: tuple[Expr] + def __init__(self, dtype, options, column: Expr): + self.dtype = dtype + self.options = options + self.children = (column,) +``` + +By following this pattern, we get an automatic (caching) +implementation of `__hash__` and `__eq__`, as well as a useful +`reconstruct` method that will rebuild the node with new children. + +If you want to control the behaviour of `__hash__` and `__eq__` for a +single node, override (respectively) the `get_hashable` and `is_equal` +methods. + +## Adding new translation rules from the polars IR + +### Plan nodes -Plan node definitions live in `cudf_polars/dsl/ir.py`, these are -`dataclasses` that inherit from the base `IR` node. The evaluation of -a plan node is done by implementing the `evaluate` method. +Plan node definitions live in `cudf_polars/dsl/ir.py`, these all +inherit from the base `IR` node. The evaluation of a plan node is done +by implementing the `evaluate` method. -To translate the plan node, add a case handler in `translate_ir` which +To translate the plan node, add a case handler in `translate_ir` that lives in `cudf_polars/dsl/translate.py`. As well as child nodes that are plans, most plan nodes contain child @@ -163,25 +227,12 @@ translating a `Join` node, the left keys (expressions) should be translated with the left input active (and right keys with right input). To facilitate this, use the `set_node` context manager. -## Adding a handler for a new expression node +### Expression nodes Adding a handle for an expression node is very similar to a plan node. -Expressions are all defined in `cudf_polars/dsl/expr.py` and inherit -from `Expr`. Unlike plan nodes, these are not `dataclasses`, since it -is simpler for us to implement efficient hashing, repr, and equality if we -can write that ourselves. - -Every expression consists of two types of data: -1. child data (other `Expr`s) -2. non-child data (anything other than an `Expr`) -The generic implementations of special methods in the base `Expr` base -class require that the subclasses advertise which arguments to the -constructor are non-child in a `_non_child` class slot. The -constructor should then take arguments: -```python -def __init__(self, *non_child_data: Any, *children: Expr): -``` -Read the docstrings in the `Expr` class for more details. +Expressions are defined in `cudf_polars/dsl/expressions/` and exported +into the `dsl` namespace via `expr.py`. They inherit +from `Expr`. Expressions are evaluated by implementing a `do_evaluate` method that takes a `DataFrame` as context (this provides columns) along with an @@ -198,6 +249,124 @@ To simplify state tracking, all columns should be considered immutable on construction. This matches the "functional" description coming from the logical plan in any case, so is reasonably natural. +## Traversing and transforming nodes + +In addition to representing and evaluating nodes. We also provide +facilities for traversing a tree of nodes and defining transformation +rules in `dsl/traversal.py`. The simplest is `traversal`, a +[pre-order](https://en.wikipedia.org/wiki/Tree_traversal) visit of all +unique nodes in an expression. Use this if you want to know some +specific thing about an expression. For example, to determine if an +expression contains a `Literal` node: + +```python +def has_literal(node: Expr) -> bool: + return any(isinstance(e, Literal) for e in traversal(node)) +``` + +It is often convenient to provide (immutable) state to a visitor, as +well as some facility to perform DAG-aware rewrites (reusing a +transformation for an expression if we have already seen it). We +therefore adopt the following pattern of writing DAG-aware visitors. +Suppose we want a rewrite rule (`rewrite`) between expressions +(`Expr`) and some new type `T`. We define our general transformation +function `rewrite` with type `Expr -> (Expr -> T) -> T`: + +```python +from cudf_polars.typing import GenericTransformer + +@singledispatch +def rewrite(e: Expr, rec: GenericTransformer[Expr, T]) -> T: + ... +``` + +Note in particular that the function to perform the recursion is +passed as the second argument. Rather than defining methods on each +node in turn for a particular rewrite rule, we prefer free functions +and use `functools.singledispatch` to provide dispatching. We now, in +the usual fashion, register handlers for different expression types. +To use this function, we need to be able to provide both the +expression to convert and the recursive function itself. To do this we +must convert our `rewrite` function into something that only takes a +single argument (the expression to rewrite), but carries around +information about how to perform the recursion. To this end, we have +two utilities in `traversal.py`: + +- `make_recursive` and +- `CachingVisitor`. + +These both implement the `GenericTransformer` protocol, and can be +wrapped around a transformation function like `rewrite` to provide a +function `Expr -> T`. They also allow us to attach arbitrary +*immutable* state to our visitor by passing a `state` dictionary. This +dictionary can then be inspected by the concrete transformation +function. `make_recursive` is very simple, and provides no caching of +intermediate results (so any DAGs that are visited will be viewed as +trees). `CachingVisitor` provides the same interface, but maintains a +cache of intermediate results, and reuses them if the same expression +is seen again. + +Finally, for writing transformations that take nodes and deliver new +nodes (e.g. rewrite rules), we have a final utility +`reuse_if_unchanged` that can be used as a base case transformation +for node to node rewrites. It is a depth-first visit that transforms +children but only returns a new node with new children if the rewrite +of children returned new nodes. + +To see how these pieces fit together, let us consider writing a +`rename` function that takes an expression (potentially with +references to columns) along with a mapping defining a renaming +between (some subset of) column names. The goal is to deliver a new +expression with appropriate columns renamed. + +To start, we define the dispatch function +```python +from collections.abc import Mapping +from functools import singledispatch +from cudf_polars.dsl.traversal import ( + CachingVisitor, make_recursive, reuse_if_unchanged +) +from cudf_polars.dsl.expr import Col, Expr +from cudf_polars.typing import ExprTransformer + + +@singledispatch +def _rename(e: Expr, rec: ExprTransformer) -> Expr: + raise NotImplementedError(f"No handler for {type(e)}") +``` +then we register specific handlers, first for columns: +```python +@_rename.register +def _(e: Col, rec: ExprTransformer) -> Expr: + mapping = rec.state["mapping"] # state set on rec + if e.name in mapping: + # If we have a rename, return a new Col reference + # with a new name + return type(e)(e.dtype, mapping[e.name]) + return e +``` +and then for the remaining expressions +```python +_rename.register(Expr)(reuse_if_unchanged) +``` + +:::{note} +In this case, we could have put the generic handler in the `_rename` +function, however, then we would not get a nice error message if we +accidentally sent in an object of the incorrect type. +::: + +Finally we tie everything together with a public function: + +```python +def rename(e: Expr, mapping: Mapping[str, str]) -> Expr: + """Rename column references in an expression.""" + mapper = CachingVisitor(_rename, state={"mapping": mapping}) + # or + # mapper = make_recursive(_rename, state={"mapping": mapping}) + return mapper(e) +``` + # Containers Containers should be constructed as relatively lightweight objects diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 5345fad41a2..a8bb634732f 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -60,7 +60,7 @@ xfail_strict = true [tool.coverage.report] exclude_also = [ "if TYPE_CHECKING:", - "class .*\\bProtocol\\):", + "class .*\\bProtocol(?:\\[[^]]+\\])?\\):", "assert_never\\(" ] # The cudf_polars test suite doesn't exercise the plugin, so we omit diff --git a/python/cudf_polars/tests/dsl/test_expr.py b/python/cudf_polars/tests/dsl/test_expr.py index b7d4672daca..84e33262869 100644 --- a/python/cudf_polars/tests/dsl/test_expr.py +++ b/python/cudf_polars/tests/dsl/test_expr.py @@ -73,3 +73,24 @@ def test_namedexpr_repr_stable(): b2 = expr.NamedExpr("b1", expr.Col(plc.DataType(plc.TypeId.INT8), "a")) assert repr(b1) == repr(b2) + + +def test_equality_cse(): + dt = plc.DataType(plc.TypeId.INT8) + + def make_expr(n1, n2): + a = expr.Col(plc.DataType(plc.TypeId.INT8), n1) + b = expr.Col(plc.DataType(plc.TypeId.INT8), n2) + + return expr.BinOp(dt, plc.binaryop.BinaryOperator.ADD, a, b) + + e1 = make_expr("a", "b") + e2 = make_expr("a", "b") + e3 = make_expr("a", "c") + + assert e1.children is not e2.children + assert e1 == e2 + assert e1.children is e2.children + assert e1 == e2 + assert e1 != e3 + assert e2 != e3 diff --git a/python/cudf_polars/tests/dsl/test_traversal.py b/python/cudf_polars/tests/dsl/test_traversal.py new file mode 100644 index 00000000000..6505a786855 --- /dev/null +++ b/python/cudf_polars/tests/dsl/test_traversal.py @@ -0,0 +1,229 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from functools import singledispatch + +import pylibcudf as plc + +import polars as pl +from polars.testing import assert_frame_equal + +from cudf_polars import translate_ir +from cudf_polars.dsl import expr, ir +from cudf_polars.dsl.traversal import ( + CachingVisitor, + make_recursive, + reuse_if_unchanged, + traversal, +) +from cudf_polars.typing import ExprTransformer, IRTransformer + + +def make_expr(dt, n1, n2): + a1 = expr.Col(dt, n1) + a2 = expr.Col(dt, n2) + + return expr.BinOp(dt, plc.binaryop.BinaryOperator.MUL, a1, a2) + + +def test_traversal_unique(): + dt = plc.DataType(plc.TypeId.INT8) + + e1 = make_expr(dt, "a", "a") + unique_exprs = list(traversal(e1)) + + assert len(unique_exprs) == 2 + assert set(unique_exprs) == {expr.Col(dt, "a"), e1} + assert unique_exprs == [e1, expr.Col(dt, "a")] + + e2 = make_expr(dt, "a", "b") + unique_exprs = list(traversal(e2)) + + assert len(unique_exprs) == 3 + assert set(unique_exprs) == {expr.Col(dt, "a"), expr.Col(dt, "b"), e2} + assert unique_exprs == [e2, expr.Col(dt, "a"), expr.Col(dt, "b")] + + e3 = make_expr(dt, "b", "a") + unique_exprs = list(traversal(e3)) + + assert len(unique_exprs) == 3 + assert set(unique_exprs) == {expr.Col(dt, "a"), expr.Col(dt, "b"), e3} + assert unique_exprs == [e3, expr.Col(dt, "b"), expr.Col(dt, "a")] + + +def rename(e, rec): + mapping = rec.state["mapping"] + if isinstance(e, expr.Col) and e.name in mapping: + return type(e)(e.dtype, mapping[e.name]) + return reuse_if_unchanged(e, rec) + + +def test_caching_visitor(): + dt = plc.DataType(plc.TypeId.INT8) + + e1 = make_expr(dt, "a", "b") + + mapper = CachingVisitor(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e1) + assert renamed == make_expr(dt, "a", "c") + assert len(mapper.cache) == 3 + + e2 = make_expr(dt, "a", "a") + mapper = CachingVisitor(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "a", "a") + assert len(mapper.cache) == 2 + mapper = CachingVisitor(rename, state={"mapping": {"a": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "c", "c") + assert len(mapper.cache) == 2 + + +def test_noop_visitor(): + dt = plc.DataType(plc.TypeId.INT8) + + e1 = make_expr(dt, "a", "b") + + mapper = make_recursive(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e1) + assert renamed == make_expr(dt, "a", "c") + + e2 = make_expr(dt, "a", "a") + mapper = make_recursive(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "a", "a") + mapper = make_recursive(rename, state={"mapping": {"a": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "c", "c") + + +def test_rewrite_ir_node(): + df = pl.LazyFrame({"a": [1, 2, 1], "b": [1, 3, 4]}) + q = df.group_by("a").agg(pl.col("b").sum()).sort("b") + + orig = translate_ir(q._ldf.visit()) + + new_df = pl.DataFrame({"a": [1, 1, 2], "b": [-1, -2, -4]}) + + def replace_df(node, rec): + if isinstance(node, ir.DataFrameScan): + return ir.DataFrameScan( + node.schema, new_df._df, node.projection, node.predicate + ) + return reuse_if_unchanged(node, rec) + + mapper = CachingVisitor(replace_df) + + new = mapper(orig) + + result = new.evaluate(cache={}).to_polars() + + expect = pl.DataFrame({"a": [2, 1], "b": [-4, -3]}) + + assert_frame_equal(result, expect) + + +def test_rewrite_scan_node(tmp_path): + left = pl.LazyFrame({"a": [1, 2, 3], "b": [1, 3, 4]}) + right = pl.DataFrame({"a": [1, 4, 2], "c": [1, 2, 3]}) + + right.write_parquet(tmp_path / "right.pq") + + right_s = pl.scan_parquet(tmp_path / "right.pq") + + q = left.join(right_s, on="a", how="inner") + + def replace_scan(node, rec): + if isinstance(node, ir.Scan): + return ir.DataFrameScan( + node.schema, right._df, node.with_columns, node.predicate + ) + return reuse_if_unchanged(node, rec) + + mapper = CachingVisitor(replace_scan) + + orig = translate_ir(q._ldf.visit()) + new = mapper(orig) + + result = new.evaluate(cache={}).to_polars() + + expect = q.collect() + + assert_frame_equal(result, expect, check_row_order=False) + + +def test_rewrite_names_and_ops(): + df = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5], "c": [5, 6, 7], "d": [7, 9, 8]}) + + q = df.select(pl.col("a") - (pl.col("b") + pl.col("c") * 2), pl.col("d")).sort("d") + + # We will replace a -> d, c -> d, and addition with multiplication + expect = ( + df.select( + (pl.col("d") - (pl.col("b") * pl.col("d") * 2)).alias("a"), pl.col("d") + ) + .sort("d") + .collect() + ) + + qir = translate_ir(q._ldf.visit()) + + @singledispatch + def _transform(e: expr.Expr, fn: ExprTransformer) -> expr.Expr: + raise NotImplementedError("Unhandled") + + @_transform.register + def _(e: expr.Col, fn: ExprTransformer): + mapping = fn.state["mapping"] + if e.name in mapping: + return type(e)(e.dtype, mapping[e.name]) + return e + + @_transform.register + def _(e: expr.BinOp, fn: ExprTransformer): + if e.op == plc.binaryop.BinaryOperator.ADD: + return type(e)( + e.dtype, plc.binaryop.BinaryOperator.MUL, *map(fn, e.children) + ) + return reuse_if_unchanged(e, fn) + + _transform.register(expr.Expr)(reuse_if_unchanged) + + @singledispatch + def _rewrite(node: ir.IR, fn: IRTransformer) -> ir.IR: + raise NotImplementedError("Unhandled") + + @_rewrite.register + def _(node: ir.Select, fn: IRTransformer): + expr_mapper = fn.state["expr_mapper"] + return type(node)( + node.schema, + [expr.NamedExpr(e.name, expr_mapper(e.value)) for e in node.exprs], + node.should_broadcast, + fn(node.children[0]), + ) + + _rewrite.register(ir.IR)(reuse_if_unchanged) + + rewriter = CachingVisitor( + _rewrite, + state={ + "expr_mapper": CachingVisitor( + _transform, state={"mapping": {"a": "d", "c": "d"}} + ) + }, + ) + + new_ir = rewriter(qir) + + got = new_ir.evaluate(cache={}).to_polars() + + assert_frame_equal(expect, got) diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py index 3c3986be19b..9900f598e5f 100644 --- a/python/cudf_polars/tests/test_config.py +++ b/python/cudf_polars/tests/test_config.py @@ -10,7 +10,7 @@ import rmm -from cudf_polars.dsl.ir import IR +from cudf_polars.dsl.ir import DataFrameScan from cudf_polars.testing.asserts import ( assert_gpu_result_equal, assert_ir_translation_raises, @@ -18,10 +18,10 @@ def test_polars_verbose_warns(monkeypatch): - def raise_unimplemented(self): + def raise_unimplemented(self, *args): raise NotImplementedError("We don't support this") - monkeypatch.setattr(IR, "__post_init__", raise_unimplemented) + monkeypatch.setattr(DataFrameScan, "__init__", raise_unimplemented) q = pl.LazyFrame({}) # Ensure that things raise assert_ir_translation_raises(q, NotImplementedError) From 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 22 Oct 2024 04:39:09 -1000 Subject: [PATCH 15/76] Add string.replace_re APIs to pylibcudf (#17023) Contributes to https://github.com/rapidsai/cudf/issues/15162 Authors: - Matthew Roeschke (https://github.com/mroeschke) - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Murray (https://github.com/Matt711) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/17023 --- .../api_docs/pylibcudf/strings/index.rst | 1 + .../api_docs/pylibcudf/strings/replace_re.rst | 6 + python/cudf/cudf/_lib/strings/replace_re.pyx | 104 ++++---------- python/cudf/cudf/core/column/string.py | 2 +- .../pylibcudf/libcudf/strings/replace_re.pxd | 24 ++-- .../pylibcudf/strings/CMakeLists.txt | 1 + .../pylibcudf/pylibcudf/strings/__init__.pxd | 2 + .../pylibcudf/pylibcudf/strings/__init__.py | 2 + .../pylibcudf/strings/replace_re.pxd | 30 ++++ .../pylibcudf/strings/replace_re.pyx | 134 ++++++++++++++++++ .../pylibcudf/tests/test_string_replace_re.py | 71 ++++++++++ 11 files changed, 289 insertions(+), 88 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace_re.rst create mode 100644 python/pylibcudf/pylibcudf/strings/replace_re.pxd create mode 100644 python/pylibcudf/pylibcudf/strings/replace_re.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_replace_re.py diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst index c8c0016126d..ae670b5bd8a 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst @@ -16,6 +16,7 @@ strings regex_flags regex_program repeat + replace_re replace side_type slice diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace_re.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace_re.rst new file mode 100644 index 00000000000..5bf715ef657 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace_re.rst @@ -0,0 +1,6 @@ +========== +replace_re +========== + +.. automodule:: pylibcudf.strings.replace_re + :members: diff --git a/python/cudf/cudf/_lib/strings/replace_re.pyx b/python/cudf/cudf/_lib/strings/replace_re.pyx index fffc8b7c3f6..462d5c903e8 100644 --- a/python/cudf/cudf/_lib/strings/replace_re.pyx +++ b/python/cudf/cudf/_lib/strings/replace_re.pyx @@ -1,26 +1,11 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cython.operator cimport dereference -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string -from libcpp.utility cimport move -from libcpp.vector cimport vector +from pylibcudf.libcudf.types cimport size_type +import pylibcudf as plc from cudf.core.buffer import acquire_spill_lock -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from pylibcudf.libcudf.strings.regex_flags cimport regex_flags -from pylibcudf.libcudf.strings.regex_program cimport regex_program -from pylibcudf.libcudf.strings.replace_re cimport ( - replace_re as cpp_replace_re, - replace_with_backrefs as cpp_replace_with_backrefs, -) -from pylibcudf.libcudf.types cimport size_type - from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar @acquire_spill_lock() @@ -34,28 +19,16 @@ def replace_re(Column source_strings, `n` indicates the number of resplacements to be made from start. (-1 indicates all) """ - - cdef DeviceScalar repl = py_repl.device_value - - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - cdef string pattern_string = str(pattern).encode() - cdef const string_scalar* scalar_repl = \ - (repl.get_raw_ptr()) - cdef regex_flags c_flags = regex_flags.DEFAULT - cdef unique_ptr[regex_program] c_prog - - with nogil: - c_prog = move(regex_program.create(pattern_string, c_flags)) - c_result = move(cpp_replace_re( - source_view, - dereference(c_prog), - scalar_repl[0], - n - )) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.replace_re.replace_re( + source_strings.to_pylibcudf(mode="read"), + plc.strings.regex_program.RegexProgram.create( + str(pattern), + plc.strings.regex_flags.RegexFlags.DEFAULT + ), + py_repl.device_value.c_value, + n + ) + return Column.from_pylibcudf(plc_column) @acquire_spill_lock() @@ -68,50 +41,29 @@ def replace_with_backrefs( new string with the extracted elements found using `pattern` regular expression in `source_strings`. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - cdef string pattern_string = str(pattern).encode() - cdef string repl_string = str(repl).encode() - cdef regex_flags c_flags = regex_flags.DEFAULT - cdef unique_ptr[regex_program] c_prog - - with nogil: - c_prog = move(regex_program.create(pattern_string, c_flags)) - c_result = move(cpp_replace_with_backrefs( - source_view, - dereference(c_prog), - repl_string - )) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.replace_re.replace_with_backrefs( + source_strings.to_pylibcudf(mode="read"), + plc.strings.regex_program.RegexProgram.create( + str(pattern), + plc.strings.regex_flags.RegexFlags.DEFAULT + ), + repl + ) + return Column.from_pylibcudf(plc_column) @acquire_spill_lock() def replace_multi_re(Column source_strings, - object patterns, + list patterns, Column repl_strings): """ Returns a Column after replacing occurrences of multiple regular expressions `patterns` with their corresponding strings in `repl_strings` in `source_strings`. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - cdef column_view repl_view = repl_strings.view() - - cdef int pattern_size = len(patterns) - cdef vector[string] patterns_vector - patterns_vector.reserve(pattern_size) - - for pattern in patterns: - patterns_vector.push_back(str.encode(pattern)) - - with nogil: - c_result = move(cpp_replace_re( - source_view, - patterns_vector, - repl_view - )) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.replace_re.replace_re( + source_strings.to_pylibcudf(mode="read"), + patterns, + repl_strings.to_pylibcudf(mode="read") + ) + return Column.from_pylibcudf(plc_column) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 45d1a8b087b..b25e486d855 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -998,7 +998,7 @@ def replace( return self._return_or_inplace( libstrings.replace_multi_re( self._column, - pat, + list(pat), column.as_column(repl, dtype="str"), ) if regex diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd index 40f0e2fa50c..6b0c90d0acc 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd @@ -6,6 +6,7 @@ from libcpp.vector cimport vector from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.strings.regex_flags cimport regex_flags from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type @@ -14,17 +15,18 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] replace_re( - column_view source_strings, - regex_program, - string_scalar repl, - size_type maxrepl) except + - - cdef unique_ptr[column] replace_with_backrefs( - column_view source_strings, - regex_program, - string repl) except + + column_view input, + regex_program prog, + string_scalar replacement, + size_type max_replace_count) except + cdef unique_ptr[column] replace_re( - column_view source_strings, + column_view input, vector[string] patterns, - column_view repls) except + + column_view replacements, + regex_flags flags) except + + + cdef unique_ptr[column] replace_with_backrefs( + column_view input, + regex_program prog, + string replacement) except + diff --git a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt index 04dd131cd75..5d7fbd24b91 100644 --- a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt @@ -28,6 +28,7 @@ set(cython_sources regex_program.pyx repeat.pyx replace.pyx + replace_re.pyx side_type.pyx slice.pyx strip.pyx diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pxd b/python/pylibcudf/pylibcudf/strings/__init__.pxd index 93c61f3f72c..da1c1c576c0 100644 --- a/python/pylibcudf/pylibcudf/strings/__init__.pxd +++ b/python/pylibcudf/pylibcudf/strings/__init__.pxd @@ -17,6 +17,7 @@ from . cimport ( regex_program, repeat, replace, + replace_re, side_type, slice, split, @@ -42,6 +43,7 @@ __all__ = [ "regex_program", "repeat", "replace", + "replace_re", "slice", "strip", "split", diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py index d52b0405f1e..40fa8261905 100644 --- a/python/pylibcudf/pylibcudf/strings/__init__.py +++ b/python/pylibcudf/pylibcudf/strings/__init__.py @@ -17,6 +17,7 @@ regex_program, repeat, replace, + replace_re, side_type, slice, split, @@ -42,6 +43,7 @@ "regex_program", "repeat", "replace", + "replace_re", "slice", "strip", "split", diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/strings/replace_re.pxd new file mode 100644 index 00000000000..e27ccd55f7d --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pxd @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column cimport Column +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar +from pylibcudf.strings.regex_flags cimport regex_flags +from pylibcudf.strings.regex_program cimport RegexProgram + +ctypedef fused Replacement: + Column + Scalar + +ctypedef fused Patterns: + RegexProgram + list + + +cpdef Column replace_re( + Column input, + Patterns patterns, + Replacement replacement=*, + size_type max_replace_count=*, + regex_flags flags=* +) + +cpdef Column replace_with_backrefs( + Column input, + RegexProgram prog, + str replacement +) diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx new file mode 100644 index 00000000000..ccc33fd4425 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx @@ -0,0 +1,134 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.string cimport string +from libcpp.utility cimport move +from libcpp.vector cimport vector +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.libcudf.strings cimport replace_re as cpp_replace_re +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar +from pylibcudf.strings.regex_flags cimport regex_flags +from pylibcudf.strings.regex_program cimport RegexProgram + + +cpdef Column replace_re( + Column input, + Patterns patterns, + Replacement replacement=None, + size_type max_replace_count=-1, + regex_flags flags=regex_flags.DEFAULT, +): + """ + For each string, replaces any character sequence matching the given patterns + with the provided replacement. + + For details, see :cpp:func:`cudf::strings::replace_re` + + Parameters + ---------- + input : Column + Strings instance for this operation. + patterns: RegexProgram or list[str] + If RegexProgram, the regex to match to each string. + If list[str], a list of regex strings to search within each string. + replacement : Scalar or Column + If Scalar, the string used to replace the matched sequence in each string. + ``patterns`` must be a RegexProgram. + If Column, the strings used for replacement. + ``patterns`` must be a list[str]. + max_replace_count : int + The maximum number of times to replace the matched pattern + within each string. ``patterns`` must be a RegexProgram. + Default replaces every substring that is matched. + flags : RegexFlags + Regex flags for interpreting special characters in the patterns. + ``patterns`` must be a list[str] + + Returns + ------- + Column + New strings column + """ + cdef unique_ptr[column] c_result + cdef vector[string] c_patterns + + if Patterns is RegexProgram and Replacement is Scalar: + if replacement is None: + replacement = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + with nogil: + c_result = move( + cpp_replace_re.replace_re( + input.view(), + patterns.c_obj.get()[0], + dereference((replacement.get())), + max_replace_count + ) + ) + + return Column.from_libcudf(move(c_result)) + elif Patterns is list and Replacement is Column: + c_patterns.reserve(len(patterns)) + for pattern in patterns: + c_patterns.push_back(pattern.encode()) + + with nogil: + c_result = move( + cpp_replace_re.replace_re( + input.view(), + c_patterns, + replacement.view(), + flags, + ) + ) + + return Column.from_libcudf(move(c_result)) + else: + raise TypeError("Must pass either a RegexProgram and a Scalar or a list") + + +cpdef Column replace_with_backrefs( + Column input, + RegexProgram prog, + str replacement +): + """ + For each string, replaces any character sequence matching the given regex + using the replacement template for back-references. + + For details, see :cpp:func:`cudf::strings::replace_with_backrefs` + + Parameters + ---------- + input : Column + Strings instance for this operation. + + prog: RegexProgram + Regex program instance. + + replacement : str + The replacement template for creating the output string. + + Returns + ------- + Column + New strings column. + """ + cdef unique_ptr[column] c_result + cdef string c_replacement = replacement.encode() + + with nogil: + c_result = cpp_replace_re.replace_with_backrefs( + input.view(), + prog.c_obj.get()[0], + c_replacement, + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py b/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py new file mode 100644 index 00000000000..ff2ce348d3b --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py @@ -0,0 +1,71 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pyarrow.compute as pc +import pylibcudf as plc +import pytest +from utils import assert_column_eq + + +@pytest.mark.parametrize("max_replace_count", [-1, 1]) +def test_replace_re_regex_program_scalar(max_replace_count): + arr = pa.array(["foo", "fuz", None]) + pat = "f." + repl = "ba" + result = plc.strings.replace_re.replace_re( + plc.interop.from_arrow(arr), + plc.strings.regex_program.RegexProgram.create( + pat, plc.strings.regex_flags.RegexFlags.DEFAULT + ), + plc.interop.from_arrow(pa.scalar(repl)), + max_replace_count=max_replace_count, + ) + expected = pc.replace_substring_regex( + arr, + pat, + repl, + max_replacements=max_replace_count + if max_replace_count != -1 + else None, + ) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize( + "flags", + [ + plc.strings.regex_flags.RegexFlags.DEFAULT, + plc.strings.regex_flags.RegexFlags.DOTALL, + ], +) +def test_replace_re_list_str_columns(flags): + arr = pa.array(["foo", "fuz", None]) + pats = ["oo", "uz"] + repls = ["a", "b"] + result = plc.strings.replace_re.replace_re( + plc.interop.from_arrow(arr), + pats, + plc.interop.from_arrow(pa.array(repls)), + flags=flags, + ) + expected = arr + for pat, repl in zip(pats, repls): + expected = pc.replace_substring_regex( + expected, + pat, + repl, + ) + assert_column_eq(result, expected) + + +def test_replace_with_backrefs(): + arr = pa.array(["Z756", None]) + result = plc.strings.replace_re.replace_with_backrefs( + plc.interop.from_arrow(arr), + plc.strings.regex_program.RegexProgram.create( + "(\\d)(\\d)", plc.strings.regex_flags.RegexFlags.DEFAULT + ), + "V\\2\\1", + ) + expected = pa.array(["ZV576", None]) + assert_column_eq(result, expected) From 14cdf53df3b8d9d7aa1bce1e8d9ff0af47998580 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Tue, 22 Oct 2024 19:13:44 -0400 Subject: [PATCH 16/76] Migrate NVText Replacing APIs to pylibcudf (#17084) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/17084 --- cpp/include/nvtext/replace.hpp | 4 +- .../api_docs/pylibcudf/nvtext/index.rst | 1 + .../api_docs/pylibcudf/nvtext/replace.rst | 6 + python/cudf/cudf/_lib/nvtext/normalize.pyx | 16 +-- python/cudf/cudf/_lib/nvtext/replace.pyx | 66 +++-------- .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 2 +- .../pylibcudf/pylibcudf/nvtext/__init__.pxd | 2 + python/pylibcudf/pylibcudf/nvtext/__init__.py | 2 + python/pylibcudf/pylibcudf/nvtext/replace.pxd | 20 ++++ python/pylibcudf/pylibcudf/nvtext/replace.pyx | 109 ++++++++++++++++++ .../pylibcudf/tests/test_nvtext_replace.py | 63 ++++++++++ 11 files changed, 230 insertions(+), 61 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/replace.rst create mode 100644 python/pylibcudf/pylibcudf/nvtext/replace.pxd create mode 100644 python/pylibcudf/pylibcudf/nvtext/replace.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py diff --git a/cpp/include/nvtext/replace.hpp b/cpp/include/nvtext/replace.hpp index bbd0503379b..822edcbdb43 100644 --- a/cpp/include/nvtext/replace.hpp +++ b/cpp/include/nvtext/replace.hpp @@ -82,7 +82,7 @@ namespace CUDF_EXPORT nvtext { * The default of empty string will identify tokens using whitespace. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New strings columns of with replaced strings + * @return New strings column with replaced strings */ std::unique_ptr replace_tokens( cudf::strings_column_view const& input, @@ -131,7 +131,7 @@ std::unique_ptr replace_tokens( * The default of empty string will identify tokens using whitespace. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New strings columns of with replaced strings + * @return New strings column of filtered strings */ std::unique_ptr filter_tokens( cudf::strings_column_view const& input, diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst index 3a79c869971..c5b9533597a 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst @@ -10,3 +10,4 @@ nvtext minhash ngrams_tokenize normalize + replace diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/replace.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/replace.rst new file mode 100644 index 00000000000..04cee972dc1 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/replace.rst @@ -0,0 +1,6 @@ +======= +replace +======= + +.. automodule:: pylibcudf.nvtext.replace + :members: diff --git a/python/cudf/cudf/_lib/nvtext/normalize.pyx b/python/cudf/cudf/_lib/nvtext/normalize.pyx index 633bc902db1..cc45123dd0a 100644 --- a/python/cudf/cudf/_lib/nvtext/normalize.pyx +++ b/python/cudf/cudf/_lib/nvtext/normalize.pyx @@ -11,16 +11,18 @@ from pylibcudf import nvtext @acquire_spill_lock() def normalize_spaces(Column input): - result = nvtext.normalize.normalize_spaces( - input.to_pylibcudf(mode="read") + return Column.from_pylibcudf( + nvtext.normalize.normalize_spaces( + input.to_pylibcudf(mode="read") + ) ) - return Column.from_pylibcudf(result) @acquire_spill_lock() def normalize_characters(Column input, bool do_lower=True): - result = nvtext.normalize.normalize_characters( - input.to_pylibcudf(mode="read"), - do_lower, + return Column.from_pylibcudf( + nvtext.normalize.normalize_characters( + input.to_pylibcudf(mode="read"), + do_lower, + ) ) - return Column.from_pylibcudf(result) diff --git a/python/cudf/cudf/_lib/nvtext/replace.pyx b/python/cudf/cudf/_lib/nvtext/replace.pyx index 61ae3da5782..bec56ade83c 100644 --- a/python/cudf/cudf/_lib/nvtext/replace.pyx +++ b/python/cudf/cudf/_lib/nvtext/replace.pyx @@ -2,20 +2,10 @@ from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.replace cimport ( - filter_tokens as cpp_filter_tokens, - replace_tokens as cpp_replace_tokens, -) -from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar +from pylibcudf import nvtext @acquire_spill_lock() @@ -30,27 +20,14 @@ def replace_tokens(Column strings, provided. """ - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef column_view c_targets = targets.view() - cdef column_view c_replacements = replacements.view() - - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_replace_tokens( - c_strings, - c_targets, - c_replacements, - c_delimiter[0], - ) + return Column.from_pylibcudf( + nvtext.replace.replace_tokens( + strings.to_pylibcudf(mode="read"), + targets.to_pylibcudf(mode="read"), + replacements.to_pylibcudf(mode="read"), + py_delimiter.device_value.c_value, ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() @@ -65,24 +42,11 @@ def filter_tokens(Column strings, character provided. """ - cdef DeviceScalar replacement = py_replacement.device_value - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_repl = replacement\ - .get_raw_ptr() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_filter_tokens( - c_strings, - min_token_length, - c_repl[0], - c_delimiter[0], - ) + return Column.from_pylibcudf( + nvtext.replace.filter_tokens( + strings.to_pylibcudf(mode="read"), + min_token_length, + py_replacement.device_value.c_value, + py_delimiter.device_value.c_value, ) - - return Column.from_unique_ptr(move(c_result)) + ) diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt index e01ca3fbdd3..7a94490998a 100644 --- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx - ngrams_tokenize.pyx normalize.pyx + ngrams_tokenize.pyx normalize.pyx replace.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd index 08dbec84090..5a5e665d309 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd @@ -7,6 +7,7 @@ from . cimport ( minhash, ngrams_tokenize, normalize, + replace, ) __all__ = [ @@ -16,4 +17,5 @@ __all__ = [ "minhash", "ngrams_tokenize", "normalize", + "replace", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index 6dccf3dd9cf..77187f0845d 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -7,6 +7,7 @@ minhash, ngrams_tokenize, normalize, + replace, ) __all__ = [ @@ -16,4 +17,5 @@ "minhash", "ngrams_tokenize", "normalize", + "replace", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/nvtext/replace.pxd new file mode 100644 index 00000000000..624f90e7486 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pxd @@ -0,0 +1,20 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column cimport Column +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar + + +cpdef Column replace_tokens( + Column input, + Column targets, + Column replacements, + Scalar delimiter=*, +) + +cpdef Column filter_tokens( + Column input, + size_type min_token_length, + Scalar replacement=*, + Scalar delimiter=* +) diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx new file mode 100644 index 00000000000..b65348ce14d --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx @@ -0,0 +1,109 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.nvtext.replace cimport ( + filter_tokens as cpp_filter_tokens, + replace_tokens as cpp_replace_tokens, +) +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar + + +cpdef Column replace_tokens( + Column input, + Column targets, + Column replacements, + Scalar delimiter=None, +): + """ + Replaces specified tokens with corresponding replacement strings. + + For details, see :cpp:func:`replace_tokens` + + Parameters + ---------- + input : Column + Strings column to replace + targets : Column + Strings to compare against tokens found in ``input`` + replacements : Column + Replacement strings for each string in ``targets`` + delimiter : Scalar, optional + Characters used to separate each string into tokens. + The default of empty string will identify tokens using whitespace. + + Returns + ------- + Column + New strings column with replaced strings + """ + cdef unique_ptr[column] c_result + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + with nogil: + c_result = cpp_replace_tokens( + input.view(), + targets.view(), + replacements.view(), + dereference(delimiter.get()), + ) + return Column.from_libcudf(move(c_result)) + + +cpdef Column filter_tokens( + Column input, + size_type min_token_length, + Scalar replacement=None, + Scalar delimiter=None +): + """ + Removes tokens whose lengths are less than a specified number of characters. + + For details, see :cpp:func:`filter_tokens` + + Parameters + ---------- + input : Column + Strings column to replace + min_token_length : size_type + The minimum number of characters to retain a + token in the output string + replacement : Scalar, optional + Optional replacement string to be used in place of removed tokens + delimiter : Scalar, optional + Characters used to separate each string into tokens. + The default of empty string will identify tokens using whitespace. + Returns + ------- + Column + New strings column of filtered strings + """ + cdef unique_ptr[column] c_result + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + if replacement is None: + replacement = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + + with nogil: + c_result = cpp_filter_tokens( + input.view(), + min_token_length, + dereference(replacement.get()), + dereference(delimiter.get()), + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py new file mode 100644 index 00000000000..0fb54bb4ee1 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pylibcudf as plc +import pytest +from utils import assert_column_eq + + +@pytest.fixture(scope="module") +def input_col(): + arr = ["the quick", "brown fox", "jumps*over the", "lazy dog"] + return pa.array(arr) + + +@pytest.fixture(scope="module") +def targets(): + arr = ["the quick", "brown fox", "jumps*over the", "lazy dog"] + return pa.array(arr) + + +@pytest.mark.parametrize("delim", ["*", None]) +def test_replace_tokens(input_col, targets, delim): + replacements = pa.array(["slow", "cat", "looked", "rat"]) + result = plc.nvtext.replace.replace_tokens( + plc.interop.from_arrow(input_col), + plc.interop.from_arrow(targets), + plc.interop.from_arrow(replacements), + plc.interop.from_arrow(pa.scalar(delim)) if delim else None, + ) + expected = pa.array(["slow", "cat", "jumps*over the", "rat"]) + if not delim: + expected = pa.array( + ["the quick", "brown fox", "jumps*over the", "lazy dog"] + ) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize("min_token_length", [4, 5]) +@pytest.mark.parametrize("replace", ["---", None]) +@pytest.mark.parametrize("delim", ["*", None]) +def test_filter_tokens(input_col, min_token_length, replace, delim): + result = plc.nvtext.replace.filter_tokens( + plc.interop.from_arrow(input_col), + min_token_length, + plc.interop.from_arrow(pa.scalar(replace)) if replace else None, + plc.interop.from_arrow(pa.scalar(delim)) if delim else None, + ) + expected = pa.array( + ["the quick", "brown fox", "jumps*over the", "lazy dog"] + ) + if not delim and not replace and min_token_length == 4: + expected = pa.array([" quick", "brown ", "jumps*over ", "lazy "]) + if not delim and not replace and min_token_length == 5: + expected = pa.array([" quick", "brown ", "jumps*over ", " "]) + if not delim and replace == "---" and min_token_length == 4: + expected = pa.array( + ["--- quick", "brown ---", "jumps*over ---", "lazy ---"] + ) + if not delim and replace == "---" and min_token_length == 5: + expected = pa.array( + ["--- quick", "brown ---", "jumps*over ---", "--- ---"] + ) + assert_column_eq(result, expected) From 27c0c9d594c9cc94261ab9a1db968bfc50aa38e0 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Tue, 22 Oct 2024 19:16:06 -0400 Subject: [PATCH 17/76] Set the default number of threads in KvikIO thread pool to 8 (#17126) Recent benchmarks have shown that setting the environment variable `KVIKIO_NTHREADS=8` in cuDF usually leads to optimal I/O performance. This PR internally sets the default KvikIO thread pool size to 8. The env `KVIKIO_NTHREADS` will still be honored if users explicitly set it. Fixes #16718 Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17126 --- cpp/include/cudf/io/config_utils.hpp | 16 ++++++++++++---- cpp/src/io/utilities/config_utils.cpp | 12 +++++++++++- cpp/src/io/utilities/data_sink.cpp | 1 + cpp/src/io/utilities/datasource.cpp | 1 + 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/io/config_utils.hpp b/cpp/include/cudf/io/config_utils.hpp index 1827ba0e3e6..13a76d50346 100644 --- a/cpp/include/cudf/io/config_utils.hpp +++ b/cpp/include/cudf/io/config_utils.hpp @@ -18,7 +18,8 @@ #include namespace CUDF_EXPORT cudf { -namespace io::cufile_integration { +namespace io { +namespace cufile_integration { /** * @brief Returns true if cuFile and its compatibility mode are enabled. @@ -35,9 +36,15 @@ bool is_gds_enabled(); */ bool is_kvikio_enabled(); -} // namespace io::cufile_integration +/** + * @brief Set kvikIO thread pool size according to the environment variable KVIKIO_NTHREADS. If + * KVIKIO_NTHREADS is not set, use 8 threads by default. + */ +void set_thread_pool_nthreads_from_env(); + +} // namespace cufile_integration -namespace io::nvcomp_integration { +namespace nvcomp_integration { /** * @brief Returns true if all nvCOMP uses are enabled. @@ -49,5 +56,6 @@ bool is_all_enabled(); */ bool is_stable_enabled(); -} // namespace io::nvcomp_integration +} // namespace nvcomp_integration +} // namespace io } // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index a3afbd52896..813743fa7b4 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -19,7 +19,10 @@ #include #include +#include + #include +#include #include #include @@ -53,6 +56,14 @@ bool is_gds_enabled() { return is_always_enabled() or get_env_policy() == usage_ bool is_kvikio_enabled() { return get_env_policy() == usage_policy::KVIKIO; } +void set_thread_pool_nthreads_from_env() +{ + static std::once_flag flag{}; + std::call_once(flag, [] { + auto nthreads = getenv_or("KVIKIO_NTHREADS", 8U); + kvikio::defaults::thread_pool_nthreads_reset(nthreads); + }); +} } // namespace cufile_integration namespace nvcomp_integration { @@ -81,5 +92,4 @@ bool is_all_enabled() { return get_env_policy() == usage_policy::ALWAYS; } bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_policy::STABLE; } } // namespace nvcomp_integration - } // namespace cudf::io diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index 0b76f3d3e8f..a8a275919d8 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -42,6 +42,7 @@ class file_sink : public data_sink { if (!_output_stream.is_open()) { detail::throw_on_file_open_failure(filepath, true); } if (cufile_integration::is_kvikio_enabled()) { + cufile_integration::set_thread_pool_nthreads_from_env(); _kvikio_file = kvikio::FileHandle(filepath, "w"); CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 2daaecadca6..19f2103071e 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -48,6 +48,7 @@ class file_source : public datasource { { detail::force_init_cuda_context(); if (cufile_integration::is_kvikio_enabled()) { + cufile_integration::set_thread_pool_nthreads_from_env(); _kvikio_file = kvikio::FileHandle(filepath); CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); From cff1296845aa9a4078dd0d95dd30b7e7c004f2d9 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 22 Oct 2024 20:28:51 -0400 Subject: [PATCH 18/76] JSON tokenizer memory optimizations (#16978) The full push-down automata that tokenizes the input JSON string, as well as the bracket-brace FST over-estimates the total buffer size required for the translated output and indices. This PR splits the `transduce` calls for both FSTs into two invocations. The first invocation estimates the size of the translated buffer and the translated indices, and the second call performs the DFA run. Authors: - Shruti Shivakumar (https://github.com/shrshi) - Karthikeyan (https://github.com/karthikeyann) Approvers: - Karthikeyan (https://github.com/karthikeyann) - Basit Ayantunde (https://github.com/lamarrr) URL: https://github.com/rapidsai/cudf/pull/16978 --- cpp/src/io/json/nested_json_gpu.cu | 43 +++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 534b30a6089..60e78f4763d 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1448,10 +1448,6 @@ void get_stack_context(device_span json_in, // Number of stack operations in the input (i.e., number of '{', '}', '[', ']' outside of quotes) cudf::detail::device_scalar d_num_stack_ops(stream); - // Sequence of stack symbols and their position in the original input (sparse representation) - rmm::device_uvector stack_ops{json_in.size(), stream}; - rmm::device_uvector stack_op_indices{json_in.size(), stream}; - // Prepare finite-state transducer that only selects '{', '}', '[', ']' outside of quotes constexpr auto max_translation_table_size = to_stack_op::NUM_SYMBOL_GROUPS * to_stack_op::TT_NUM_STATES; @@ -1468,11 +1464,26 @@ void get_stack_context(device_span json_in, // "Search" for relevant occurrence of brackets and braces that indicate the beginning/end // of structs/lists + // Run FST to estimate the sizes of translated buffers + json_to_stack_ops_fst.Transduce(json_in.begin(), + static_cast(json_in.size()), + thrust::make_discard_iterator(), + thrust::make_discard_iterator(), + d_num_stack_ops.data(), + to_stack_op::start_state, + stream); + + auto stack_ops_bufsize = d_num_stack_ops.value(stream); + // Sequence of stack symbols and their position in the original input (sparse representation) + rmm::device_uvector stack_ops{stack_ops_bufsize, stream}; + rmm::device_uvector stack_op_indices{stack_ops_bufsize, stream}; + + // Run bracket-brace FST to retrieve starting positions of structs and lists json_to_stack_ops_fst.Transduce(json_in.begin(), static_cast(json_in.size()), stack_ops.data(), stack_op_indices.data(), - d_num_stack_ops.data(), + thrust::make_discard_iterator(), to_stack_op::start_state, stream); @@ -1508,6 +1519,7 @@ std::pair, rmm::device_uvector> pr device_span token_indices, rmm::cuda_stream_view stream) { + CUDF_FUNC_RANGE(); // Instantiate FST for post-processing the token stream to remove all tokens that belong to an // invalid JSON line token_filter::UnwrapTokenFromSymbolOp sgid_op{}; @@ -1643,21 +1655,28 @@ std::pair, rmm::device_uvector> ge // see a JSON-line delimiter as the very first item SymbolOffsetT const delimiter_offset = (format == tokenizer_pda::json_format_cfg_t::JSON_LINES_RECOVER ? 1 : 0); - rmm::device_uvector tokens{max_token_out_count + delimiter_offset, stream, mr}; - rmm::device_uvector tokens_indices{ - max_token_out_count + delimiter_offset, stream, mr}; + // Run FST to estimate the size of output buffers json_to_tokens_fst.Transduce(zip_in, static_cast(json_in.size()), - tokens.data() + delimiter_offset, - tokens_indices.data() + delimiter_offset, + thrust::make_discard_iterator(), + thrust::make_discard_iterator(), num_written_tokens.data(), tokenizer_pda::start_state, stream); auto const num_total_tokens = num_written_tokens.value(stream) + delimiter_offset; - tokens.resize(num_total_tokens, stream); - tokens_indices.resize(num_total_tokens, stream); + rmm::device_uvector tokens{num_total_tokens, stream, mr}; + rmm::device_uvector tokens_indices{num_total_tokens, stream, mr}; + + // Run FST to translate the input JSON string into tokens and indices at which they occur + json_to_tokens_fst.Transduce(zip_in, + static_cast(json_in.size()), + tokens.data() + delimiter_offset, + tokens_indices.data() + delimiter_offset, + thrust::make_discard_iterator(), + tokenizer_pda::start_state, + stream); if (delimiter_offset == 1) { tokens.set_element(0, token_t::LineEnd, stream); From 3126f775c527a8df65df2e2cbc8c2b73da2219bf Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Tue, 22 Oct 2024 22:34:30 -0500 Subject: [PATCH 19/76] [Bug] Fix Arrow-FS parquet reader for larger files (#17099) Follow-up to https://github.com/rapidsai/cudf/pull/16684 There is currently a bug in `dask_cudf.read_parquet(..., filesystem="arrow")` when the files are larger than the `"dataframe.parquet.minimum-partition-size"` config. More specifically, when the files are not aggregated together, the output will be `pd.DataFrame` instead of `cudf.DataFrame`. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/cudf/pull/17099 --- python/dask_cudf/dask_cudf/expr/_expr.py | 30 ++++++++++++++---- .../dask_cudf/io/tests/test_parquet.py | 31 ++++++++++++++++++- .../dask_cudf/dask_cudf/io/tests/test_s3.py | 4 +++ 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/python/dask_cudf/dask_cudf/expr/_expr.py b/python/dask_cudf/dask_cudf/expr/_expr.py index 4a9f4de8b9c..c7cf66fbffd 100644 --- a/python/dask_cudf/dask_cudf/expr/_expr.py +++ b/python/dask_cudf/dask_cudf/expr/_expr.py @@ -12,7 +12,7 @@ ) from dask_expr._reductions import Reduction, Var from dask_expr.io.io import FusedParquetIO -from dask_expr.io.parquet import ReadParquetPyarrowFS +from dask_expr.io.parquet import FragmentWrapper, ReadParquetPyarrowFS from dask.dataframe.core import ( _concat, @@ -302,16 +302,34 @@ def _dataset_info(self): return dataset_info @staticmethod - def _table_to_pandas( - table, - index_name, - *args, - ): + def _table_to_pandas(table, index_name): df = cudf.DataFrame.from_arrow(table) if index_name is not None: df = df.set_index(index_name) return df + def _filtered_task(self, index: int): + columns = self.columns.copy() + index_name = self.index.name + if self.index is not None: + index_name = self.index.name + schema = self._dataset_info["schema"].remove_metadata() + if index_name: + if columns is None: + columns = list(schema.names) + columns.append(index_name) + return ( + self._table_to_pandas, + ( + self._fragment_to_table, + FragmentWrapper(self.fragments[index], filesystem=self.fs), + self.filters, + columns, + schema, + ), + index_name, + ) + def _tune_up(self, parent): if self._fusion_compression_factor >= 1: return diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index 896c4169f5b..ae5ca480e31 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -15,7 +15,11 @@ import cudf import dask_cudf -from dask_cudf.tests.utils import skip_dask_expr, xfail_dask_expr +from dask_cudf.tests.utils import ( + require_dask_expr, + skip_dask_expr, + xfail_dask_expr, +) # Check if create_metadata_file is supported by # the current dask.dataframe version @@ -615,3 +619,28 @@ def test_timezone_column(tmpdir): got = dask_cudf.read_parquet(path) expect = cudf.read_parquet(path) dd.assert_eq(got, expect) + + +@require_dask_expr() +@pytest.mark.skipif( + not dask_cudf.backends.PYARROW_GE_15, + reason="Requires pyarrow 15", +) +@pytest.mark.parametrize("min_part_size", ["1B", "1GB"]) +def test_read_parquet_arrow_filesystem(tmpdir, min_part_size): + tmp_path = str(tmpdir) + with dask.config.set( + { + "dataframe.backend": "cudf", + "dataframe.parquet.minimum-partition-size": min_part_size, + } + ): + dd.from_dict( + {"x": range(1000), "y": ["a", "b", "c", "d"] * 250}, + npartitions=10, + ).to_parquet(tmp_path, write_index=False) + df = cudf.read_parquet(tmp_path) + ddf = dask_cudf.read_parquet(tmp_path, filesystem="arrow") + dd.assert_eq(df, ddf, check_index=False) + assert isinstance(ddf._meta, cudf.DataFrame) + assert isinstance(ddf.compute(), cudf.DataFrame) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_s3.py b/python/dask_cudf/dask_cudf/io/tests/test_s3.py index cf8af82e112..90907f6fb99 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_s3.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_s3.py @@ -11,6 +11,8 @@ from dask.dataframe import assert_eq +import cudf + import dask_cudf from dask_cudf.tests.utils import QUERY_PLANNING_ON @@ -168,6 +170,8 @@ def test_read_parquet_filesystem(s3_base, s3so, pdf, filesystem): filesystem=filesystem, ) assert df.b.sum().compute() == 9 + assert isinstance(df._meta, cudf.DataFrame) + assert isinstance(df.compute(), cudf.DataFrame) def test_read_parquet_filesystem_explicit(s3_base, s3so, pdf): From f0c6a04c6b4bd8f09b59adfaeb97435a90898fb0 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Wed, 23 Oct 2024 07:41:03 -0700 Subject: [PATCH 20/76] Add JNI Support for Multi-line Delimiters and Include Test (#17139) This PR introduces the necessary changes to the cuDF jni to support the issue described in [NVIDIA/spark-rapids#11554](https://github.com/NVIDIA/spark-rapids/issues/11554). For further information, refer to the details in the [comment](https://github.com/NVIDIA/spark-rapids/issues/11554#issuecomment-2425327183). Issue #15961 adds support for handling multiple line delimiters. This PR extends that functionality to JNI, which was previously missing, and also includes a test to validate the changes. Authors: - Suraj Aralihalli (https://github.com/SurajAralihalli) Approvers: - MithunR (https://github.com/mythrocks) - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/17139 --- .../main/java/ai/rapids/cudf/RegexFlag.java | 11 +++++- .../java/ai/rapids/cudf/ColumnVectorTest.java | 38 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/RegexFlag.java b/java/src/main/java/ai/rapids/cudf/RegexFlag.java index 7ed8e0354c9..68a3856f37d 100644 --- a/java/src/main/java/ai/rapids/cudf/RegexFlag.java +++ b/java/src/main/java/ai/rapids/cudf/RegexFlag.java @@ -28,7 +28,16 @@ public enum RegexFlag { DEFAULT(0), // default MULTILINE(8), // the '^' and '$' honor new-line characters DOTALL(16), // the '.' matching includes new-line characters - ASCII(256); // use only ASCII when matching built-in character classes + ASCII(256), // use only ASCII when matching built-in character classes + /** + * EXT_NEWLINE(512): Extends line delimiters to include the following Unicode characters + * - NEXT_LINE ('\u0085') + * - LINE_SEPARATOR ('\u2028') + * - PARAGRAPH_SEPARATOR ('\u2029') + * - CARRIAGE_RETURN ('\r') + * - NEW_LINE ('\n') + */ + EXT_NEWLINE(512); final int nativeId; // Native id, for use with libcudf. private RegexFlag(int nativeId) { // Only constant values should be used diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 708744569df..14c290b300a 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -31,6 +31,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumSet; import java.util.List; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; @@ -3877,6 +3878,43 @@ void testExtractRe() { } } + @Test +void testExtractReWithMultiLineDelimiters() { + String NEXT_LINE = "\u0085"; + String LINE_SEPARATOR = "\u2028"; + String PARAGRAPH_SEPARATOR = "\u2029"; + String CARRIAGE_RETURN = "\r"; + String NEW_LINE = "\n"; + + try (ColumnVector input = ColumnVector.fromStrings( + "boo:" + NEXT_LINE + "boo::" + LINE_SEPARATOR + "boo:::", + "boo:::" + LINE_SEPARATOR + "zzé" + CARRIAGE_RETURN + "lll", + "boo::", + "", + "boo::" + NEW_LINE, + "boo::" + CARRIAGE_RETURN, + "boo:" + NEXT_LINE + "boo::" + PARAGRAPH_SEPARATOR, + "boo:" + NEW_LINE + "boo::" + LINE_SEPARATOR, + "boo:" + NEXT_LINE + "boo::" + NEXT_LINE); + Table expected_ext_newline = new Table.TestBuilder() + .column("boo:::", null, "boo::", null, "boo::", "boo::", "boo::", "boo::", "boo::") + .build(); + Table expected_default = new Table.TestBuilder() + .column("boo:::", null, "boo::", null, "boo::", null, null, null, null) + .build()) { + + // Regex pattern to match 'boo:' followed by one or more colons at the end of the string + try (Table found = input.extractRe(new RegexProgram("(boo:+)$", EnumSet.of(RegexFlag.EXT_NEWLINE)))) { + assertColumnsAreEqual(expected_ext_newline.getColumns()[0], found.getColumns()[0]); + } + + try (Table found = input.extractRe(new RegexProgram("(boo:+)$", EnumSet.of(RegexFlag.DEFAULT)))) { + assertColumnsAreEqual(expected_default.getColumns()[0], found.getColumns()[0]); + } + } + } + + @Test void testExtractAllRecord() { String pattern = "([ab])(\\d)"; From 02ee8199a7703eafe8aa061213423eecf27757ec Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 23 Oct 2024 09:28:20 -0700 Subject: [PATCH 21/76] Use async execution policy for true_if (#17146) Closes #17117 Related to #12086 This PR replaces the synchronous execution policy with an asynchronous one to eliminate unnecessary synchronization. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - David Wendt (https://github.com/davidwendt) - Shruti Shivakumar (https://github.com/shrshi) - Jason Lowe (https://github.com/jlowe) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17146 --- cpp/include/cudf/detail/unary.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 18b1e9b2d2e..0f852db0c54 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -59,7 +59,7 @@ std::unique_ptr true_if(InputIterator begin, auto output_mutable_view = output->mutable_view(); auto output_data = output_mutable_view.data(); - thrust::transform(rmm::exec_policy(stream), begin, end, output_data, p); + thrust::transform(rmm::exec_policy_nosync(stream), begin, end, output_data, p); return output; } From deb9af4df7c9c44a43a1d146d4ba4eb8bad31cbb Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 23 Oct 2024 11:21:24 -0700 Subject: [PATCH 22/76] Replace direct `cudaMemcpyAsync` calls with utility functions (limited to `cudf::io`) (#17132) Issue https://github.com/rapidsai/cudf/issues/15620 Replaced the calls to `cudaMemcpyAsync` with the new `cuda_memcpy`/`cuda_memcpy_async` utility, which optionally avoids using the copy engine. Changes are limited to cuIO to make the PR easier to review (repetitive enough as-is!). Also took the opportunity to use `cudf::detail::host_vector` and its factories to enable wider pinned memory use. Skipped a few instances of `cudaMemcpyAsync`; few are under `io::comp`, which we don't want to invest in further (if possible). The other `cudaMemcpyAsync` instances are D2D copies, which `cuda_memcpy`/`cuda_memcpy_async` don't support. Perhaps they should, just to make the use ubiquitous. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Paul Mattione (https://github.com/pmattione-nvidia) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17132 --- cpp/src/io/comp/uncomp.cpp | 6 +- cpp/src/io/csv/reader_impl.cu | 57 ++++++++----------- cpp/src/io/csv/writer_impl.cu | 10 +--- cpp/src/io/json/host_tree_algorithms.cu | 24 +++----- cpp/src/io/json/json_column.cu | 16 +++--- cpp/src/io/json/json_tree.cu | 15 ++--- cpp/src/io/json/read_json.cu | 13 ++--- cpp/src/io/orc/reader_impl_chunking.cu | 8 +-- cpp/src/io/orc/writer_impl.cu | 26 ++++----- cpp/src/io/parquet/predicate_pushdown.cpp | 21 ++++--- cpp/src/io/parquet/reader_impl.cpp | 2 +- cpp/src/io/parquet/reader_impl.hpp | 2 +- cpp/src/io/parquet/reader_impl_chunking.hpp | 2 +- cpp/src/io/parquet/reader_impl_preprocess.cu | 37 +++++------- cpp/src/io/parquet/writer_impl.cu | 29 +++------- cpp/src/io/text/bgzip_data_chunk_source.cu | 4 +- .../io/text/data_chunk_source_factories.cpp | 22 +++---- cpp/src/io/utilities/datasource.cpp | 12 ++-- 18 files changed, 134 insertions(+), 172 deletions(-) diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 1af45b41d8e..d4d6f46b99a 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -538,8 +538,10 @@ size_t decompress_zstd(host_span src, CUDF_EXPECTS(hd_stats[0].status == compression_status::SUCCESS, "ZSTD decompression failed"); // Copy temporary output to `dst` - CUDF_CUDA_TRY(cudaMemcpyAsync( - dst.data(), d_dst.data(), hd_stats[0].bytes_written, cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + dst.subspan(0, hd_stats[0].bytes_written), + device_span{d_dst.data(), hd_stats[0].bytes_written}, + stream); return hd_stats[0].bytes_written; } diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 8c32fc85f78..72fca75c56b 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -21,6 +21,7 @@ #include "csv_common.hpp" #include "csv_gpu.hpp" +#include "cudf/detail/utilities/cuda_memcpy.hpp" #include "io/comp/io_uncomp.hpp" #include "io/utilities/column_buffer.hpp" #include "io/utilities/hostdevice_vector.hpp" @@ -275,11 +276,10 @@ std::pair, selected_rows_offsets> load_data_and_gather auto const read_offset = byte_range_offset + input_pos + previous_data_size; auto const read_size = target_pos - input_pos - previous_data_size; if (data.has_value()) { - CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data() + previous_data_size, - data->data() + read_offset, - target_pos - input_pos - previous_data_size, - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{d_data.data() + previous_data_size, read_size}, + data->subspan(read_offset, read_size), + stream); } else { if (source->is_device_read_preferred(read_size)) { source->device_read(read_offset, @@ -288,12 +288,11 @@ std::pair, selected_rows_offsets> load_data_and_gather stream); } else { auto const buffer = source->host_read(read_offset, read_size); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data() + previous_data_size, - buffer->data(), - buffer->size(), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); // To prevent buffer going out of scope before we copy the data. + // Use sync version to prevent buffer going out of scope before we copy the data. + cudf::detail::cuda_memcpy( + device_span{d_data.data() + previous_data_size, read_size}, + host_span{reinterpret_cast(buffer->data()), buffer->size()}, + stream); } } @@ -311,12 +310,10 @@ std::pair, selected_rows_offsets> load_data_and_gather range_end, skip_rows, stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_ctx.device_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + + cudf::detail::cuda_memcpy(host_span{row_ctx}.subspan(0, num_blocks), + device_span{row_ctx}.subspan(0, num_blocks), + stream); // Sum up the rows in each character block, selecting the row count that // corresponds to the current input context. Also stores the now known input @@ -331,11 +328,9 @@ std::pair, selected_rows_offsets> load_data_and_gather // At least one row in range in this batch all_row_offsets.resize(total_rows - skip_rows, stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.device_ptr(), - row_ctx.host_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async(device_span{row_ctx}.subspan(0, num_blocks), + host_span{row_ctx}.subspan(0, num_blocks), + stream); // Pass 2: Output row offsets cudf::io::csv::gpu::gather_row_offsets(parse_opts.view(), @@ -352,12 +347,9 @@ std::pair, selected_rows_offsets> load_data_and_gather stream); // With byte range, we want to keep only one row out of the specified range if (range_end < data_size) { - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_ctx.device_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + cudf::detail::cuda_memcpy(host_span{row_ctx}.subspan(0, num_blocks), + device_span{row_ctx}.subspan(0, num_blocks), + stream); size_t rows_out_of_range = 0; for (uint32_t i = 0; i < num_blocks; i++) { @@ -401,12 +393,9 @@ std::pair, selected_rows_offsets> load_data_and_gather // Remove header rows and extract header auto const header_row_index = std::max(header_rows, 1) - 1; if (header_row_index + 1 < row_offsets.size()) { - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_offsets.data() + header_row_index, - 2 * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + cudf::detail::cuda_memcpy(host_span{row_ctx}.subspan(0, 2), + device_span{row_offsets.data() + header_row_index, 2}, + stream); auto const header_start = input_pos + row_ctx[0]; auto const header_end = input_pos + row_ctx[1]; diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index b84446b5f3e..2bbe05ced84 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -405,13 +406,8 @@ void write_chunked(data_sink* out_sink, out_sink->device_write(ptr_all_bytes, total_num_bytes, stream); } else { // copy the bytes to host to write them out - thrust::host_vector h_bytes(total_num_bytes); - CUDF_CUDA_TRY(cudaMemcpyAsync(h_bytes.data(), - ptr_all_bytes, - total_num_bytes * sizeof(char), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + auto const h_bytes = cudf::detail::make_host_vector_sync( + device_span{ptr_all_bytes, total_num_bytes}, stream); out_sink->host_write(h_bytes.data(), total_num_bytes); } diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu index 7ee652e0239..d06338c6f69 100644 --- a/cpp/src/io/json/host_tree_algorithms.cu +++ b/cpp/src/io/json/host_tree_algorithms.cu @@ -198,17 +198,13 @@ NodeIndexT get_row_array_parent_col_id(device_span col_ids, bool is_enabled_lines, rmm::cuda_stream_view stream) { - NodeIndexT value = parent_node_sentinel; - if (!col_ids.empty()) { - auto const list_node_index = is_enabled_lines ? 0 : 1; - CUDF_CUDA_TRY(cudaMemcpyAsync(&value, - col_ids.data() + list_node_index, - sizeof(NodeIndexT), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); - } - return value; + if (col_ids.empty()) { return parent_node_sentinel; } + + auto const list_node_index = is_enabled_lines ? 0 : 1; + auto const value = cudf::detail::make_host_vector_sync( + device_span{col_ids.data() + list_node_index, 1}, stream); + + return value[0]; } /** * @brief Holds member data pointers of `d_json_column` @@ -818,11 +814,7 @@ std::pair, hashmap_of_device_columns> build_tree column_categories.cbegin(), expected_types.begin(), [](auto exp, auto cat) { return exp == NUM_NODE_CLASSES ? cat : exp; }); - cudaMemcpyAsync(d_column_tree.node_categories.begin(), - expected_types.data(), - expected_types.size() * sizeof(column_categories[0]), - cudaMemcpyDefault, - stream.value()); + cudf::detail::cuda_memcpy_async(d_column_tree.node_categories, expected_types, stream); return {is_pruned, columns}; } diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 4584f71775f..7e4d975e431 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -513,16 +513,14 @@ table_with_metadata device_parse_nested_json(device_span d_input, #endif bool const is_array_of_arrays = [&]() { - std::array h_node_categories = {NC_ERR, NC_ERR}; - auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); - CUDF_CUDA_TRY(cudaMemcpyAsync(h_node_categories.data(), - gpu_tree.node_categories.data(), - sizeof(node_t) * size_to_copy, - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); + if (size_to_copy == 0) return false; + auto const h_node_categories = cudf::detail::make_host_vector_sync( + device_span{gpu_tree.node_categories.data(), size_to_copy}, stream); + if (options.is_enabled_lines()) return h_node_categories[0] == NC_LIST; - return h_node_categories[0] == NC_LIST and h_node_categories[1] == NC_LIST; + return h_node_categories.size() >= 2 and h_node_categories[0] == NC_LIST and + h_node_categories[1] == NC_LIST; }(); auto [gpu_col_id, gpu_row_offsets] = diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index d949635c1cc..e2fe926ea19 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -264,16 +264,13 @@ tree_meta_t get_tree_representation(device_span tokens, error_count > 0) { auto const error_location = thrust::find(rmm::exec_policy(stream), tokens.begin(), tokens.end(), token_t::ErrorBegin); - SymbolOffsetT error_index; - CUDF_CUDA_TRY( - cudaMemcpyAsync(&error_index, - token_indices.data() + thrust::distance(tokens.begin(), error_location), - sizeof(SymbolOffsetT), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + auto error_index = cudf::detail::make_host_vector_sync( + device_span{ + token_indices.data() + thrust::distance(tokens.begin(), error_location), 1}, + stream); + CUDF_FAIL("JSON Parser encountered an invalid format at location " + - std::to_string(error_index)); + std::to_string(error_index[0])); } auto const num_tokens = tokens.size(); diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index c424d2b3b62..8a740ae17ef 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -315,13 +315,12 @@ device_span ingest_raw_input(device_span buffer, // Reading to host because decompression of a single block is much faster on the CPU sources[0]->host_read(range_offset, remaining_bytes_to_read, hbuffer.data()); auto uncomp_data = decompress(compression, hbuffer); - CUDF_CUDA_TRY(cudaMemcpyAsync(buffer.data(), - reinterpret_cast(uncomp_data.data()), - uncomp_data.size() * sizeof(char), - cudaMemcpyHostToDevice, - stream.value())); - stream.synchronize(); - return buffer.first(uncomp_data.size()); + auto ret_buffer = buffer.first(uncomp_data.size()); + cudf::detail::cuda_memcpy( + ret_buffer, + host_span{reinterpret_cast(uncomp_data.data()), uncomp_data.size()}, + stream); + return ret_buffer; } table_with_metadata read_json(host_span> sources, diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9cc77e8e738..fcaee9c548e 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -516,10 +516,10 @@ void reader_impl::load_next_stripe_data(read_mode mode) _stream.synchronize(); stream_synchronized = true; } - device_read_tasks.push_back( - std::pair(source_ptr->device_read_async( - read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), - read_info.length)); + device_read_tasks.emplace_back( + source_ptr->device_read_async( + read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), + read_info.length); } else { auto buffer = source_ptr->host_read(read_info.offset, read_info.length); diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 03020eb649f..d432deb8e79 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -19,6 +19,7 @@ * @brief cuDF-IO ORC writer class implementation */ +#include "cudf/detail/utilities/cuda_memcpy.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/orc/orc_gpu.hpp" #include "io/statistics/column_statistics.cuh" @@ -1408,7 +1409,8 @@ encoded_footer_statistics finish_statistic_blobs(Footer const& footer, num_entries_seen += stripes_per_col; } - std::vector file_stats_merge(num_file_blobs); + auto file_stats_merge = + cudf::detail::make_host_vector(num_file_blobs, stream); for (auto i = 0u; i < num_file_blobs; ++i) { auto col_stats = &file_stats_merge[i]; col_stats->col_dtype = per_chunk_stats.col_types[i]; @@ -1418,11 +1420,10 @@ encoded_footer_statistics finish_statistic_blobs(Footer const& footer, } auto d_file_stats_merge = stats_merge.device_ptr(num_stripe_blobs); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_file_stats_merge, - file_stats_merge.data(), - num_file_blobs * sizeof(statistics_merge_group), - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{stats_merge.device_ptr(num_stripe_blobs), num_file_blobs}, + file_stats_merge, + stream); auto file_stat_chunks = stat_chunks.data() + num_stripe_blobs; detail::merge_group_statistics( @@ -1573,7 +1574,7 @@ void write_index_stream(int32_t stripe_id, * @param[in] strm_desc Stream's descriptor * @param[in] enc_stream Chunk's streams * @param[in] compressed_data Compressed stream data - * @param[in,out] stream_out Temporary host output buffer + * @param[in,out] bounce_buffer Pinned memory bounce buffer for D2H data transfer * @param[in,out] stripe Stream's parent stripe * @param[in,out] streams List of all streams * @param[in] compression_kind The compression kind @@ -1584,7 +1585,7 @@ void write_index_stream(int32_t stripe_id, std::future write_data_stream(gpu::StripeStream const& strm_desc, gpu::encoder_chunk_streams const& enc_stream, uint8_t const* compressed_data, - uint8_t* stream_out, + host_span bounce_buffer, StripeInformation* stripe, orc_streams* streams, CompressionKind compression_kind, @@ -1604,11 +1605,10 @@ std::future write_data_stream(gpu::StripeStream const& strm_desc, if (out_sink->is_device_write_preferred(length)) { return out_sink->device_write_async(stream_in, length, stream); } else { - CUDF_CUDA_TRY( - cudaMemcpyAsync(stream_out, stream_in, length, cudaMemcpyDefault, stream.value())); - stream.synchronize(); + cudf::detail::cuda_memcpy( + bounce_buffer.subspan(0, length), device_span{stream_in, length}, stream); - out_sink->host_write(stream_out, length); + out_sink->host_write(bounce_buffer.data(), length); return std::async(std::launch::deferred, [] {}); } }(); @@ -2616,7 +2616,7 @@ void writer::impl::write_orc_data_to_sink(encoded_data const& enc_data, strm_desc, enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], compressed_data.data(), - bounce_buffer.data(), + bounce_buffer, &stripe, &streams, _compression_kind, diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index f0a0bc0b51b..32e922b04bb 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -454,15 +454,18 @@ std::optional>> aggregate_reader_metadata::fi CUDF_EXPECTS(predicate.type().id() == cudf::type_id::BOOL8, "Filter expression must return a boolean column"); - auto num_bitmasks = num_bitmask_words(predicate.size()); - std::vector host_bitmask(num_bitmasks, ~bitmask_type{0}); - if (predicate.nullable()) { - CUDF_CUDA_TRY(cudaMemcpyAsync(host_bitmask.data(), - predicate.null_mask(), - num_bitmasks * sizeof(bitmask_type), - cudaMemcpyDefault, - stream.value())); - } + auto const host_bitmask = [&] { + auto const num_bitmasks = num_bitmask_words(predicate.size()); + if (predicate.nullable()) { + return cudf::detail::make_host_vector_sync( + device_span(predicate.null_mask(), num_bitmasks), stream); + } else { + auto bitmask = cudf::detail::make_host_vector(num_bitmasks, stream); + std::fill(bitmask.begin(), bitmask.end(), ~bitmask_type{0}); + return bitmask; + } + }(); + auto validity_it = cudf::detail::make_counting_transform_iterator( 0, [bitmask = host_bitmask.data()](auto bit_index) { return bit_is_set(bitmask, bit_index); }); diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index f0865c715bc..0705ff6f5cc 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -78,7 +78,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num // TODO: This step is somewhat redundant if size info has already been calculated (nested schema, // chunked reader). auto const has_strings = (kernel_mask & STRINGS_MASK) != 0; - std::vector col_string_sizes(_input_columns.size(), 0L); + auto col_string_sizes = cudf::detail::make_host_vector(_input_columns.size(), _stream); if (has_strings) { // need to compute pages bounds/sizes if we lack page indexes or are using custom bounds // TODO: we could probably dummy up size stats for FLBA data since we know the width diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 62ffc4d3077..3aa9b94ed6b 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -284,7 +284,7 @@ class reader::impl { * * @return Vector of total string data sizes for each column */ - std::vector calculate_page_string_offsets(); + cudf::detail::host_vector calculate_page_string_offsets(); /** * @brief Converts the page data and outputs to columns. diff --git a/cpp/src/io/parquet/reader_impl_chunking.hpp b/cpp/src/io/parquet/reader_impl_chunking.hpp index 3a3cdd34a58..a0c2dbd3e44 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.hpp +++ b/cpp/src/io/parquet/reader_impl_chunking.hpp @@ -107,7 +107,7 @@ struct subpass_intermediate_data { * rowgroups may represent less than all of the rowgroups to be read for the file. */ struct pass_intermediate_data { - std::vector> raw_page_data; + std::vector raw_page_data; // rowgroup, chunk and page information for the current pass. bool has_compressed_data{false}; diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 60e35465793..f03f1214b9a 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -218,7 +218,7 @@ void generate_depth_remappings( */ [[nodiscard]] std::future read_column_chunks_async( std::vector> const& sources, - std::vector>& page_data, + cudf::host_span page_data, cudf::detail::hostdevice_vector& chunks, size_t begin_chunk, size_t end_chunk, @@ -251,23 +251,24 @@ void generate_depth_remappings( if (source->is_device_read_preferred(io_size)) { // Buffer needs to be padded. // Required by `gpuDecodePageData`. - auto buffer = + page_data[chunk] = rmm::device_buffer(cudf::util::round_up_safe(io_size, BUFFER_PADDING_MULTIPLE), stream); auto fut_read_size = source->device_read_async( - io_offset, io_size, static_cast(buffer.data()), stream); + io_offset, io_size, static_cast(page_data[chunk].data()), stream); read_tasks.emplace_back(std::move(fut_read_size)); - page_data[chunk] = datasource::buffer::create(std::move(buffer)); } else { auto const read_buffer = source->host_read(io_offset, io_size); // Buffer needs to be padded. // Required by `gpuDecodePageData`. - auto tmp_buffer = rmm::device_buffer( + page_data[chunk] = rmm::device_buffer( cudf::util::round_up_safe(read_buffer->size(), BUFFER_PADDING_MULTIPLE), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync( - tmp_buffer.data(), read_buffer->data(), read_buffer->size(), cudaMemcpyDefault, stream)); - page_data[chunk] = datasource::buffer::create(std::move(tmp_buffer)); + CUDF_CUDA_TRY(cudaMemcpyAsync(page_data[chunk].data(), + read_buffer->data(), + read_buffer->size(), + cudaMemcpyDefault, + stream)); } - auto d_compdata = page_data[chunk]->data(); + auto d_compdata = static_cast(page_data[chunk].data()); do { chunks[chunk].compressed_data = d_compdata; d_compdata += chunks[chunk].compressed_size; @@ -980,7 +981,7 @@ std::pair> reader::impl::read_column_chunks() std::vector chunk_source_map(num_chunks); // Tracker for eventually deallocating compressed and uncompressed data - raw_page_data = std::vector>(num_chunks); + raw_page_data = std::vector(num_chunks); // Keep track of column chunk file offsets std::vector column_chunk_offsets(num_chunks); @@ -1695,15 +1696,14 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num nullmask_bufs, std::numeric_limits::max(), _stream); } -std::vector reader::impl::calculate_page_string_offsets() +cudf::detail::host_vector reader::impl::calculate_page_string_offsets() { auto& pass = *_pass_itm_data; auto& subpass = *pass.subpass; auto page_keys = make_page_key_iterator(subpass.pages); - std::vector col_sizes(_input_columns.size(), 0L); - rmm::device_uvector d_col_sizes(col_sizes.size(), _stream); + rmm::device_uvector d_col_sizes(_input_columns.size(), _stream); // use page_index to fetch page string sizes in the proper order auto val_iter = thrust::make_transform_iterator(subpass.pages.device_begin(), @@ -1717,7 +1717,7 @@ std::vector reader::impl::calculate_page_string_offsets() page_offset_output_iter{subpass.pages.device_ptr()}); // now sum up page sizes - rmm::device_uvector reduce_keys(col_sizes.size(), _stream); + rmm::device_uvector reduce_keys(d_col_sizes.size(), _stream); thrust::reduce_by_key(rmm::exec_policy_nosync(_stream), page_keys, page_keys + subpass.pages.size(), @@ -1725,14 +1725,7 @@ std::vector reader::impl::calculate_page_string_offsets() reduce_keys.begin(), d_col_sizes.begin()); - cudaMemcpyAsync(col_sizes.data(), - d_col_sizes.data(), - sizeof(size_t) * col_sizes.size(), - cudaMemcpyDeviceToHost, - _stream); - _stream.synchronize(); - - return col_sizes; + return cudf::detail::make_host_vector_sync(d_col_sizes, _stream); } } // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 190f13eb688..f865c9a7643 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -183,7 +183,7 @@ struct aggregate_writer_metadata { std::vector row_groups; std::vector key_value_metadata; std::vector offset_indexes; - std::vector> column_indexes; + std::vector> column_indexes; }; std::vector files; std::optional> column_orders = std::nullopt; @@ -1543,12 +1543,7 @@ void encode_pages(hostdevice_2dvector& chunks, d_chunks.flat_view(), {column_stats, pages.size()}, column_index_truncate_length, stream); } - auto h_chunks = chunks.host_view(); - CUDF_CUDA_TRY(cudaMemcpyAsync(h_chunks.data(), - d_chunks.data(), - d_chunks.flat_view().size_bytes(), - cudaMemcpyDefault, - stream.value())); + chunks.device_to_host_async(stream); if (comp_stats.has_value()) { comp_stats.value() += collect_compression_statistics(comp_in, comp_res, stream); @@ -2559,12 +2554,11 @@ void writer::impl::write_parquet_data_to_sink( } else { CUDF_EXPECTS(bounce_buffer.size() >= ck.compressed_size, "Bounce buffer was not properly initialized."); - CUDF_CUDA_TRY(cudaMemcpyAsync(bounce_buffer.data(), - dev_bfr + ck.ck_stat_size, - ck.compressed_size, - cudaMemcpyDefault, - _stream.value())); - _stream.synchronize(); + cudf::detail::cuda_memcpy( + host_span{bounce_buffer}.subspan(0, ck.compressed_size), + device_span{dev_bfr + ck.ck_stat_size, ck.compressed_size}, + _stream); + _out_sink[p]->host_write(bounce_buffer.data(), ck.compressed_size); } @@ -2600,13 +2594,8 @@ void writer::impl::write_parquet_data_to_sink( auto const& column_chunk_meta = row_group.columns[i].meta_data; // start transfer of the column index - std::vector column_idx; - column_idx.resize(ck.column_index_size); - CUDF_CUDA_TRY(cudaMemcpyAsync(column_idx.data(), - ck.column_index_blob, - ck.column_index_size, - cudaMemcpyDefault, - _stream.value())); + auto column_idx = cudf::detail::make_host_vector_async( + device_span{ck.column_index_blob, ck.column_index_size}, _stream); // calculate offsets while the column index is transferring int64_t curr_pg_offset = column_chunk_meta.data_page_offset; diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index badcd3f58f9..06069630685 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -74,8 +74,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader { // Buffer needs to be padded. // Required by `inflate_kernel`. device.resize(cudf::util::round_up_safe(host.size(), BUFFER_PADDING_MULTIPLE), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync( - device.data(), host.data(), host.size() * sizeof(T), cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{device}.subspan(0, host.size()), host, stream); } struct decompression_blocks { diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 58faa0ebfe4..4baea8655e0 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -87,8 +87,10 @@ class datasource_chunk_reader : public data_chunk_reader { _source->host_read(_offset, read_size, reinterpret_cast(h_ticket.buffer.data())); // copy the host-pinned data on to device - CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{chunk}.subspan(0, read_size), + host_span{h_ticket.buffer}.subspan(0, read_size), + stream); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); @@ -153,8 +155,10 @@ class istream_data_chunk_reader : public data_chunk_reader { auto chunk = rmm::device_uvector(read_size, stream); // copy the host-pinned data on to device - CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{chunk}.subspan(0, read_size), + host_span{h_ticket.buffer}.subspan(0, read_size), + stream); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); @@ -193,12 +197,10 @@ class host_span_data_chunk_reader : public data_chunk_reader { auto chunk = rmm::device_uvector(read_size, stream); // copy the host data to device - CUDF_CUDA_TRY(cudaMemcpyAsync( // - chunk.data(), - _data.data() + _position, - read_size, - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async( + cudf::device_span{chunk}.subspan(0, read_size), + cudf::host_span{_data}.subspan(_position, read_size), + stream); _position += read_size; diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 19f2103071e..4e8908a8942 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -18,6 +18,7 @@ #include "getenv_or.hpp" #include +#include #include #include #include @@ -247,17 +248,18 @@ class device_buffer_source final : public datasource { size_t host_read(size_t offset, size_t size, uint8_t* dst) override { auto const count = std::min(size, this->size() - offset); - auto const stream = cudf::get_default_stream(); - CUDF_CUDA_TRY( - cudaMemcpyAsync(dst, _d_buffer.data() + offset, count, cudaMemcpyDefault, stream.value())); - stream.synchronize(); + auto const stream = cudf::detail::global_cuda_stream_pool().get_stream(); + cudf::detail::cuda_memcpy(host_span{dst, count}, + device_span{ + reinterpret_cast(_d_buffer.data() + offset), count}, + stream); return count; } std::unique_ptr host_read(size_t offset, size_t size) override { auto const count = std::min(size, this->size() - offset); - auto const stream = cudf::get_default_stream(); + auto const stream = cudf::detail::global_cuda_stream_pool().get_stream(); auto h_data = cudf::detail::make_host_vector_async( cudf::device_span{_d_buffer.data() + offset, count}, stream); stream.synchronize(); From e7653a70743a76ad3c8ca4b377aa0ec4303e5556 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:23:04 -0500 Subject: [PATCH 23/76] Use managed memory for NDSH benchmarks (#17039) Fixes https://github.com/rapidsai/cudf/issues/16987 Use managed memory to generate the parquet data, and write parquet data to host buffer. Replace use of parquet_device_buffer with cuio_source_sink_pair Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - David Wendt (https://github.com/davidwendt) - Tianyu Liu (https://github.com/kingcrimsontianyu) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/17039 --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/ndsh/q01.cpp | 8 +- cpp/benchmarks/ndsh/q05.cpp | 16 +-- cpp/benchmarks/ndsh/q06.cpp | 8 +- cpp/benchmarks/ndsh/q09.cpp | 17 ++-- cpp/benchmarks/ndsh/q10.cpp | 13 +-- cpp/benchmarks/ndsh/utilities.cpp | 157 +++++++++++++++++++++++------- cpp/benchmarks/ndsh/utilities.hpp | 19 ++-- 8 files changed, 161 insertions(+), 79 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index e61a8e6e1e6..f013b31b3de 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -49,7 +49,7 @@ target_compile_options( target_link_libraries( ndsh_data_generator - PUBLIC cudf GTest::gmock GTest::gtest cudf::cudftestutil nvtx3::nvtx3-cpp + PUBLIC cudf cudf::cudftestutil nvtx3::nvtx3-cpp PRIVATE $ ) diff --git a/cpp/benchmarks/ndsh/q01.cpp b/cpp/benchmarks/ndsh/q01.cpp index ef709926ae9..485e8e5497c 100644 --- a/cpp/benchmarks/ndsh/q01.cpp +++ b/cpp/benchmarks/ndsh/q01.cpp @@ -104,7 +104,7 @@ } void run_ndsh_q1(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Define the column projections and filter predicate for `lineitem` table std::vector const lineitem_cols = {"l_returnflag", @@ -124,8 +124,8 @@ void run_ndsh_q1(nvbench::state& state, cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal); // Read out the `lineitem` table from parquet file - auto lineitem = - read_parquet(sources["lineitem"].make_source_info(), lineitem_cols, std::move(lineitem_pred)); + auto lineitem = read_parquet( + sources.at("lineitem").make_source_info(), lineitem_cols, std::move(lineitem_pred)); // Calculate the discount price and charge columns and append to lineitem table auto disc_price = @@ -170,7 +170,7 @@ void ndsh_q1(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); auto stream = cudf::get_default_stream(); diff --git a/cpp/benchmarks/ndsh/q05.cpp b/cpp/benchmarks/ndsh/q05.cpp index 522bc4789c2..1c2d657913e 100644 --- a/cpp/benchmarks/ndsh/q05.cpp +++ b/cpp/benchmarks/ndsh/q05.cpp @@ -89,7 +89,7 @@ } void run_ndsh_q5(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; @@ -120,17 +120,17 @@ void run_ndsh_q5(nvbench::state& state, // Read out the tables from parquet files // while pushing down the column projections and filter predicates auto const customer = - read_parquet(sources["customer"].make_source_info(), {"c_custkey", "c_nationkey"}); + read_parquet(sources.at("customer").make_source_info(), {"c_custkey", "c_nationkey"}); auto const orders = - read_parquet(sources["orders"].make_source_info(), orders_cols, std::move(orders_pred)); - auto const lineitem = read_parquet(sources["lineitem"].make_source_info(), + read_parquet(sources.at("orders").make_source_info(), orders_cols, std::move(orders_pred)); + auto const lineitem = read_parquet(sources.at("lineitem").make_source_info(), {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); auto const supplier = - read_parquet(sources["supplier"].make_source_info(), {"s_suppkey", "s_nationkey"}); + read_parquet(sources.at("supplier").make_source_info(), {"s_suppkey", "s_nationkey"}); auto const nation = - read_parquet(sources["nation"].make_source_info(), {"n_nationkey", "n_regionkey", "n_name"}); + read_parquet(sources.at("nation").make_source_info(), {"n_nationkey", "n_regionkey", "n_name"}); auto const region = - read_parquet(sources["region"].make_source_info(), region_cols, std::move(region_pred)); + read_parquet(sources.at("region").make_source_info(), region_cols, std::move(region_pred)); // Perform the joins auto const join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); @@ -165,7 +165,7 @@ void ndsh_q5(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources( scale_factor, {"customer", "orders", "lineitem", "supplier", "nation", "region"}, sources); diff --git a/cpp/benchmarks/ndsh/q06.cpp b/cpp/benchmarks/ndsh/q06.cpp index 04078547973..e1e56c3622e 100644 --- a/cpp/benchmarks/ndsh/q06.cpp +++ b/cpp/benchmarks/ndsh/q06.cpp @@ -64,7 +64,7 @@ } void run_ndsh_q6(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Read out the `lineitem` table from parquet file std::vector const lineitem_cols = { @@ -83,8 +83,8 @@ void run_ndsh_q6(nvbench::state& state, cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal); auto const lineitem_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b); - auto lineitem = - read_parquet(sources["lineitem"].make_source_info(), lineitem_cols, std::move(lineitem_pred)); + auto lineitem = read_parquet( + sources.at("lineitem").make_source_info(), lineitem_cols, std::move(lineitem_pred)); // Cast the discount and quantity columns to float32 and append to lineitem table auto discout_float = @@ -134,7 +134,7 @@ void ndsh_q6(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); auto stream = cudf::get_default_stream(); diff --git a/cpp/benchmarks/ndsh/q09.cpp b/cpp/benchmarks/ndsh/q09.cpp index 59218ab8912..2e9a69d9ee2 100644 --- a/cpp/benchmarks/ndsh/q09.cpp +++ b/cpp/benchmarks/ndsh/q09.cpp @@ -112,20 +112,21 @@ } void run_ndsh_q9(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Read out the table from parquet files auto const lineitem = read_parquet( - sources["lineitem"].make_source_info(), + sources.at("lineitem").make_source_info(), {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); - auto const nation = read_parquet(sources["nation"].make_source_info(), {"n_nationkey", "n_name"}); + auto const nation = + read_parquet(sources.at("nation").make_source_info(), {"n_nationkey", "n_name"}); auto const orders = - read_parquet(sources["orders"].make_source_info(), {"o_orderkey", "o_orderdate"}); - auto const part = read_parquet(sources["part"].make_source_info(), {"p_partkey", "p_name"}); - auto const partsupp = read_parquet(sources["partsupp"].make_source_info(), + read_parquet(sources.at("orders").make_source_info(), {"o_orderkey", "o_orderdate"}); + auto const part = read_parquet(sources.at("part").make_source_info(), {"p_partkey", "p_name"}); + auto const partsupp = read_parquet(sources.at("partsupp").make_source_info(), {"ps_suppkey", "ps_partkey", "ps_supplycost"}); auto const supplier = - read_parquet(sources["supplier"].make_source_info(), {"s_suppkey", "s_nationkey"}); + read_parquet(sources.at("supplier").make_source_info(), {"s_suppkey", "s_nationkey"}); // Generating the `profit` table // Filter the part table using `p_name like '%green%'` @@ -178,7 +179,7 @@ void ndsh_q9(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources( scale_factor, {"part", "supplier", "lineitem", "partsupp", "orders", "nation"}, sources); diff --git a/cpp/benchmarks/ndsh/q10.cpp b/cpp/benchmarks/ndsh/q10.cpp index a520480020a..72edd15083d 100644 --- a/cpp/benchmarks/ndsh/q10.cpp +++ b/cpp/benchmarks/ndsh/q10.cpp @@ -94,7 +94,7 @@ } void run_ndsh_q10(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; @@ -122,15 +122,16 @@ void run_ndsh_q10(nvbench::state& state, // Read out the tables from parquet files // while pushing down the column projections and filter predicates auto const customer = read_parquet( - sources["customer"].make_source_info(), + sources.at("customer").make_source_info(), {"c_custkey", "c_name", "c_nationkey", "c_acctbal", "c_address", "c_phone", "c_comment"}); auto const orders = - read_parquet(sources["orders"].make_source_info(), orders_cols, std::move(orders_pred)); + read_parquet(sources.at("orders").make_source_info(), orders_cols, std::move(orders_pred)); auto const lineitem = - read_parquet(sources["lineitem"].make_source_info(), + read_parquet(sources.at("lineitem").make_source_info(), {"l_extendedprice", "l_discount", "l_orderkey", "l_returnflag"}, std::move(lineitem_pred)); - auto const nation = read_parquet(sources["nation"].make_source_info(), {"n_name", "n_nationkey"}); + auto const nation = + read_parquet(sources.at("nation").make_source_info(), {"n_name", "n_nationkey"}); // Perform the joins auto const join_a = apply_inner_join(customer, nation, {"c_nationkey"}, {"n_nationkey"}); @@ -163,7 +164,7 @@ void ndsh_q10(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources( scale_factor, {"customer", "orders", "lineitem", "nation"}, sources); diff --git a/cpp/benchmarks/ndsh/utilities.cpp b/cpp/benchmarks/ndsh/utilities.cpp index 62116ddf661..9f9849860c9 100644 --- a/cpp/benchmarks/ndsh/utilities.cpp +++ b/cpp/benchmarks/ndsh/utilities.cpp @@ -17,6 +17,8 @@ #include "utilities.hpp" #include "common/ndsh_data_generator/ndsh_data_generator.hpp" +#include "common/table_utilities.hpp" +#include "cudf/detail/utilities/integer_utils.hpp" #include #include @@ -30,8 +32,15 @@ #include #include +#include +#include +#include + +#include #include #include +#include +#include namespace { @@ -85,6 +94,15 @@ std::vector const NATION_SCHEMA = { "n_nationkey", "n_name", "n_regionkey", "n_comment"}; std::vector const REGION_SCHEMA = {"r_regionkey", "r_name", "r_comment"}; +std::unordered_map const> const SCHEMAS = { + {"orders", ORDERS_SCHEMA}, + {"lineitem", LINEITEM_SCHEMA}, + {"part", PART_SCHEMA}, + {"partsupp", PARTSUPP_SCHEMA}, + {"supplier", SUPPLIER_SCHEMA}, + {"customer", CUSTOMER_SCHEMA}, + {"nation", NATION_SCHEMA}, + {"region", REGION_SCHEMA}}; } // namespace cudf::table_view table_with_names::table() const { return tbl->view(); } @@ -337,7 +355,7 @@ int32_t days_since_epoch(int year, int month, int day) void write_to_parquet_device_buffer(std::unique_ptr const& table, std::vector const& col_names, - parquet_device_buffer& source) + cuio_source_sink_pair& source) { CUDF_FUNC_RANGE(); auto const stream = cudf::get_default_stream(); @@ -351,55 +369,124 @@ void write_to_parquet_device_buffer(std::unique_ptr const& table, metadata.schema_info = col_name_infos; auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; - // Declare a host and device buffer - std::vector h_buffer; - + auto est_size = static_cast(estimate_size(table->view())); + constexpr auto PQ_MAX_TABLE_BYTES = 8ul << 30; // 8GB + // TODO: best to get this limit from percent_of_free_device_memory(50) of device memory resource. + if (est_size > PQ_MAX_TABLE_BYTES) { + auto builder = cudf::io::chunked_parquet_writer_options::builder(source.make_sink_info()); + builder.metadata(table_input_metadata); + auto const options = builder.build(); + auto num_splits = static_cast( + std::ceil(static_cast(est_size) / (PQ_MAX_TABLE_BYTES))); + std::vector splits(num_splits - 1); + auto num_rows = table->num_rows(); + auto num_row_per_chunk = cudf::util::div_rounding_up_safe(num_rows, num_splits); + std::generate_n(splits.begin(), splits.size(), [num_row_per_chunk, i = 0]() mutable { + return (i += num_row_per_chunk); + }); + std::vector split_tables = cudf::split(table->view(), splits, stream); + auto writer = cudf::io::parquet_chunked_writer(options, stream); + for (auto const& chunk_table : split_tables) { + writer.write(chunk_table); + } + writer.close(); + return; + } // Write parquet data to host buffer - auto builder = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&h_buffer), table->view()); + auto builder = cudf::io::parquet_writer_options::builder(source.make_sink_info(), table->view()); builder.metadata(table_input_metadata); auto const options = builder.build(); - cudf::io::write_parquet(options); + cudf::io::write_parquet(options, stream); +} - // Copy host buffer to device buffer - source.d_buffer.resize(h_buffer.size(), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync( - source.d_buffer.data(), h_buffer.data(), h_buffer.size(), cudaMemcpyDefault, stream.value())); +inline auto make_managed_pool() +{ + return rmm::mr::make_owning_wrapper( + std::make_shared(), rmm::percent_of_free_device_memory(50)); } void generate_parquet_data_sources(double scale_factor, std::vector const& table_names, - std::unordered_map& sources) + std::unordered_map& sources) { CUDF_FUNC_RANGE(); - std::for_each(table_names.begin(), table_names.end(), [&](auto const& table_name) { - sources[table_name] = parquet_device_buffer(); - }); - auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + // Set the memory resource to the managed pool + auto old_mr = cudf::get_current_device_resource(); + // if already managed pool or managed, don't create new one. + using managed_pool_mr_t = decltype(make_managed_pool()); + managed_pool_mr_t managed_pool_mr; + bool const is_managed = + dynamic_cast*>(old_mr) or + dynamic_cast(old_mr); + if (!is_managed) { + std::cout << "Creating managed pool just for data generation\n"; + managed_pool_mr = make_managed_pool(); + cudf::set_current_device_resource(managed_pool_mr.get()); + // drawback: if already pool takes 50% of free memory, we are left with 50% of 50% of free + // memory. + } - auto partsupp = cudf::datagen::generate_partsupp( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + std::unordered_set const requested_table_names = [&table_names]() { + if (table_names.empty()) { + return std::unordered_set{ + "orders", "lineitem", "part", "partsupp", "supplier", "customer", "nation", "region"}; + } + return std::unordered_set(table_names.begin(), table_names.end()); + }(); + std::for_each( + requested_table_names.begin(), requested_table_names.end(), [&](auto const& table_name) { + sources.emplace(table_name, cuio_source_sink_pair(io_type::HOST_BUFFER)); + }); + std::unordered_map> tables; + + if (sources.count("orders") or sources.count("lineitem") or sources.count("part")) { + auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + if (sources.count("orders")) { + write_to_parquet_device_buffer(orders, SCHEMAS.at("orders"), sources.at("orders")); + orders = {}; + } + if (sources.count("part")) { + write_to_parquet_device_buffer(part, SCHEMAS.at("part"), sources.at("part")); + part = {}; + } + if (sources.count("lineitem")) { + write_to_parquet_device_buffer(lineitem, SCHEMAS.at("lineitem"), sources.at("lineitem")); + lineitem = {}; + } + } + + if (sources.count("partsupp")) { + auto partsupp = cudf::datagen::generate_partsupp( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(partsupp, SCHEMAS.at("partsupp"), sources.at("partsupp")); + } - auto supplier = cudf::datagen::generate_supplier( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + if (sources.count("supplier")) { + auto supplier = cudf::datagen::generate_supplier( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(supplier, SCHEMAS.at("supplier"), sources.at("supplier")); + } - auto customer = cudf::datagen::generate_customer( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + if (sources.count("customer")) { + auto customer = cudf::datagen::generate_customer( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(customer, SCHEMAS.at("customer"), sources.at("customer")); + } - auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); + if (sources.count("nation")) { + auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(nation, SCHEMAS.at("nation"), sources.at("nation")); + } - auto region = cudf::datagen::generate_region(cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); + if (sources.count("region")) { + auto region = cudf::datagen::generate_region(cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(region, SCHEMAS.at("region"), sources.at("region")); + } - write_to_parquet_device_buffer(std::move(orders), ORDERS_SCHEMA, sources["orders"]); - write_to_parquet_device_buffer(std::move(lineitem), LINEITEM_SCHEMA, sources["lineitem"]); - write_to_parquet_device_buffer(std::move(part), PART_SCHEMA, sources["part"]); - write_to_parquet_device_buffer(std::move(partsupp), PARTSUPP_SCHEMA, sources["partsupp"]); - write_to_parquet_device_buffer(std::move(customer), CUSTOMER_SCHEMA, sources["customer"]); - write_to_parquet_device_buffer(std::move(supplier), SUPPLIER_SCHEMA, sources["supplier"]); - write_to_parquet_device_buffer(std::move(nation), NATION_SCHEMA, sources["nation"]); - write_to_parquet_device_buffer(std::move(region), REGION_SCHEMA, sources["region"]); + // Restore the original memory resource + if (!is_managed) { cudf::set_current_device_resource(old_mr); } } diff --git a/cpp/benchmarks/ndsh/utilities.hpp b/cpp/benchmarks/ndsh/utilities.hpp index 762e43deccf..cae07f86a98 100644 --- a/cpp/benchmarks/ndsh/utilities.hpp +++ b/cpp/benchmarks/ndsh/utilities.hpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "io/cuio_common.hpp" + #include #include #include @@ -196,24 +198,15 @@ std::tm make_tm(int year, int month, int day); int32_t days_since_epoch(int year, int month, int day); /** - * @brief Struct representing a parquet device buffer - */ -struct parquet_device_buffer { - parquet_device_buffer() : d_buffer{0, cudf::get_default_stream()} {}; - cudf::io::source_info make_source_info() { return cudf::io::source_info(d_buffer); } - rmm::device_uvector d_buffer; -}; - -/** - * @brief Write a `cudf::table` to a parquet device buffer + * @brief Write a `cudf::table` to a parquet cuio sink * * @param table The `cudf::table` to write * @param col_names The column names of the table - * @param parquet_device_buffer The parquet device buffer to write the table to + * @param source The source sink pair to write the table to */ void write_to_parquet_device_buffer(std::unique_ptr const& table, std::vector const& col_names, - parquet_device_buffer& source); + cuio_source_sink_pair& source); /** * @brief Generate NDS-H tables and write to parquet device buffers @@ -224,4 +217,4 @@ void write_to_parquet_device_buffer(std::unique_ptr const& table, */ void generate_parquet_data_sources(double scale_factor, std::vector const& table_names, - std::unordered_map& sources); + std::unordered_map& sources); From 02879724b3073675ff3384071ef9227427f57b41 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 23 Oct 2024 19:16:59 -0400 Subject: [PATCH 24/76] Use the full ref name of `rmm.DeviceBuffer` in the sphinx config file (#17150) This is an improvement PR that uses the full name of `rmm.DeviceBuffer` in the sphinx config file. Its a follow-up to this [comment](https://github.com/rapidsai/cudf/pull/16913#discussion_r1792283249). Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17150 --- docs/cudf/source/conf.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index ecf619ddc44..5942cc16850 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -342,10 +342,7 @@ def clean_all_xml_files(path): "cudf.Series": ("cudf.core.series.Series", "cudf.Series"), "cudf.Index": ("cudf.core.index.Index", "cudf.Index"), "cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"), - # TODO: Replace the first entry in a follow-up with rmm.pylibrmm.device_buffer.DeviceBuffer - # when the RMM objects inventory is generated from branch-24.12. The RMM objects inventory - # can be accessed here : https://docs.rapids.ai/api/rmm/nightly/objects.inv - "DeviceBuffer": ("rmm.DeviceBuffer", "rmm.DeviceBuffer"), + "DeviceBuffer": ("rmm.pylibrmm.device_buffer.DeviceBuffer", "rmm.DeviceBuffer"), } From d7cdf44da2ba921c6fa63feff8749d141643f76e Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 23 Oct 2024 20:19:57 -0400 Subject: [PATCH 25/76] Migrate NVText Stemming APIs to pylibcudf (#17085) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17085 --- cpp/include/nvtext/stemmer.hpp | 8 +- .../api_docs/pylibcudf/nvtext/index.rst | 1 + .../api_docs/pylibcudf/nvtext/stemmer.rst | 6 ++ python/cudf/cudf/_lib/nvtext/stemmer.pyx | 56 +++++--------- .../pylibcudf/libcudf/nvtext/stemmer.pxd | 7 +- .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 2 +- .../pylibcudf/pylibcudf/nvtext/__init__.pxd | 2 + python/pylibcudf/pylibcudf/nvtext/__init__.py | 2 + python/pylibcudf/pylibcudf/nvtext/stemmer.pxd | 14 ++++ python/pylibcudf/pylibcudf/nvtext/stemmer.pyx | 76 +++++++++++++++++++ .../pylibcudf/tests/test_nvtext_stemmer.py | 47 ++++++++++++ 11 files changed, 178 insertions(+), 43 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/stemmer.rst create mode 100644 python/pylibcudf/pylibcudf/nvtext/stemmer.pxd create mode 100644 python/pylibcudf/pylibcudf/nvtext/stemmer.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py diff --git a/cpp/include/nvtext/stemmer.hpp b/cpp/include/nvtext/stemmer.hpp index 55a4124bfd0..e5b2a4cc21b 100644 --- a/cpp/include/nvtext/stemmer.hpp +++ b/cpp/include/nvtext/stemmer.hpp @@ -51,7 +51,7 @@ enum class letter_type { * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * b1 = is_letter(st, VOWEL, 1) * b1 is now [false, true, true] * @endcode @@ -62,7 +62,7 @@ enum class letter_type { * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * b2 = is_letter(st, CONSONANT, -1) // last letter checked in each string * b2 is now [false, true, false] * @endcode @@ -99,7 +99,7 @@ std::unique_ptr is_letter( * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * ix = [3, 1, 4] * b1 = is_letter(st, VOWEL, ix) * b1 is now [true, true, false] @@ -111,7 +111,7 @@ std::unique_ptr is_letter( * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * ix = [3, -2, 4] // 2nd to last character in st[1] is checked * b2 = is_letter(st, CONSONANT, ix) * b2 is now [false, false, true] diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst index c5b9533597a..e0735a197fd 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst @@ -11,3 +11,4 @@ nvtext ngrams_tokenize normalize replace + stemmer diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/stemmer.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/stemmer.rst new file mode 100644 index 00000000000..b407ff8451a --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/stemmer.rst @@ -0,0 +1,6 @@ +======= +stemmer +======= + +.. automodule:: pylibcudf.nvtext.stemmer + :members: diff --git a/python/cudf/cudf/_lib/nvtext/stemmer.pyx b/python/cudf/cudf/_lib/nvtext/stemmer.pyx index 5bf25562fed..63a389b64d5 100644 --- a/python/cudf/cudf/_lib/nvtext/stemmer.pyx +++ b/python/cudf/cudf/_lib/nvtext/stemmer.pyx @@ -1,24 +1,19 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cudf.core.buffer import acquire_spill_lock - -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - from enum import IntEnum -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view +from cudf.core.buffer import acquire_spill_lock + from pylibcudf.libcudf.nvtext.stemmer cimport ( - is_letter as cpp_is_letter, letter_type, - porter_stemmer_measure as cpp_porter_stemmer_measure, underlying_type_t_letter_type, ) from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column +from pylibcudf import nvtext + class LetterType(IntEnum): CONSONANT = letter_type.CONSONANT @@ -27,43 +22,34 @@ class LetterType(IntEnum): @acquire_spill_lock() def porter_stemmer_measure(Column strings): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_porter_stemmer_measure(c_strings)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + nvtext.stemmer.porter_stemmer_measure( + strings.to_pylibcudf(mode="read"), + ) + ) @acquire_spill_lock() def is_letter(Column strings, object ltype, size_type index): - cdef column_view c_strings = strings.view() - cdef letter_type c_ltype = ( - ltype + return Column.from_pylibcudf( + nvtext.stemmer.is_letter( + strings.to_pylibcudf(mode="read"), + ltype==LetterType.VOWEL, + index, + ) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_is_letter(c_strings, c_ltype, index)) - - return Column.from_unique_ptr(move(c_result)) @acquire_spill_lock() def is_letter_multi(Column strings, object ltype, Column indices): - cdef column_view c_strings = strings.view() - cdef column_view c_indices = indices.view() - cdef letter_type c_ltype = ( - ltype + return Column.from_pylibcudf( + nvtext.stemmer.is_letter( + strings.to_pylibcudf(mode="read"), + ltype==LetterType.VOWEL, + indices.to_pylibcudf(mode="read"), + ) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_is_letter(c_strings, c_ltype, c_indices)) - - return Column.from_unique_ptr(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd index 673bffa28ae..be3a2d75718 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd @@ -1,6 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from libc.stdint cimport int32_t +from libcpp cimport bool from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -8,9 +9,9 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil: - ctypedef enum letter_type: - CONSONANT 'nvtext::letter_type::CONSONANT' - VOWEL 'nvtext::letter_type::VOWEL' + cpdef enum class letter_type: + CONSONANT + VOWEL cdef unique_ptr[column] porter_stemmer_measure( const column_view & strings diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt index 7a94490998a..d97c0a73267 100644 --- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx - ngrams_tokenize.pyx normalize.pyx replace.pyx + ngrams_tokenize.pyx normalize.pyx replace.pyx stemmer.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd index 5a5e665d309..a658e57018e 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd @@ -8,6 +8,7 @@ from . cimport ( ngrams_tokenize, normalize, replace, + stemmer, ) __all__ = [ @@ -18,4 +19,5 @@ __all__ = [ "ngrams_tokenize", "normalize", "replace", + "stemmer", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index 77187f0845d..2c1feb089a2 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -8,6 +8,7 @@ ngrams_tokenize, normalize, replace, + stemmer, ) __all__ = [ @@ -18,4 +19,5 @@ "ngrams_tokenize", "normalize", "replace", + "stemmer", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd new file mode 100644 index 00000000000..48762efc01f --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.stemmer cimport letter_type +from pylibcudf.libcudf.types cimport size_type + +ctypedef fused ColumnOrSize: + Column + size_type + +cpdef Column is_letter(Column input, bool check_vowels, ColumnOrSize indices) + +cpdef Column porter_stemmer_measure(Column input) diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx new file mode 100644 index 00000000000..854d1053624 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx @@ -0,0 +1,76 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.nvtext.stemmer cimport ( + is_letter as cpp_is_letter, + letter_type, + porter_stemmer_measure as cpp_porter_stemmer_measure, +) +from pylibcudf.libcudf.types cimport size_type + + +cpdef Column is_letter( + Column input, + bool check_vowels, + ColumnOrSize indices +): + """ + Returns boolean column indicating if the character + or characters at the provided character index or + indices (respectively) are consonants or vowels + + For details, see :cpp:func:`is_letter` + + Parameters + ---------- + input : Column + Input strings + check_vowels : bool + If true, the check is for vowels. Otherwise the check is + for consonants. + indices : Union[Column, size_type] + The character position(s) to check in each string + + Returns + ------- + Column + New boolean column. + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_is_letter( + input.view(), + letter_type.VOWEL if check_vowels else letter_type.CONSONANT, + indices if ColumnOrSize is size_type else indices.view() + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column porter_stemmer_measure(Column input): + """ + Returns the Porter Stemmer measurements of a strings column. + + For details, see :cpp:func:`porter_stemmer_measure` + + Parameters + ---------- + input : Column + Strings column of words to measure + + Returns + ------- + Column + New column of measure values + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_porter_stemmer_measure(input.view()) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py new file mode 100644 index 00000000000..75d56f587a4 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py @@ -0,0 +1,47 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pylibcudf as plc +import pytest +from utils import assert_column_eq + + +@pytest.fixture(scope="module") +def input_col(): + arr = ["trouble", "toy", "syzygy"] + return pa.array(arr) + + +@pytest.mark.parametrize("check_vowels", [True, False]) +@pytest.mark.parametrize("indices", [[3, 1, 4], 1]) +def test_is_letter(input_col, check_vowels, indices): + def is_letter(s, i, check): + vowels = "aeiouy" + return (s[i] in vowels) == check + + result = plc.nvtext.stemmer.is_letter( + plc.interop.from_arrow(input_col), + check_vowels, + plc.interop.from_arrow(pa.array(indices)) + if isinstance(indices, list) + else indices, + ) + expected = pa.array( + [ + is_letter( + s, + indices[i] if isinstance(indices, list) else indices, + check_vowels, + ) + for i, s in enumerate(input_col.to_pylist()) + ] + ) + assert_column_eq(result, expected) + + +def test_porter_stemmer_measure(input_col): + result = plc.nvtext.stemmer.porter_stemmer_measure( + plc.interop.from_arrow(input_col), + ) + expected = pa.array([1, 1, 2], type=pa.int32()) + assert_column_eq(result, expected) From 3a623149827ec347e721dd1a18072f18b0b4bcc1 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 24 Oct 2024 14:10:33 +0100 Subject: [PATCH 26/76] Upgrade to polars 1.11 in cudf-polars (#17154) Polars 1.11 is out, with slight updates to the IR, so we can correctly raise for dynamic groupbys and see inequality joins. These changes adapt to that and do a first pass at supporting inequality joins (by translating to cross + filter). A followup (#17000) will use libcudf's conditional joins. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Bradley Dice (https://github.com/bdice) - Mike Sarahan (https://github.com/msarahan) URL: https://github.com/rapidsai/cudf/pull/17154 --- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cudf-polars/meta.yaml | 2 +- dependencies.yaml | 2 +- python/cudf_polars/cudf_polars/dsl/ir.py | 17 ++--- .../cudf_polars/cudf_polars/dsl/translate.py | 76 ++++++++++++++++++- .../cudf_polars/cudf_polars/testing/plugin.py | 38 ++++++++-- python/cudf_polars/pyproject.toml | 2 +- python/cudf_polars/tests/test_join.py | 60 ++++++++++++++- 9 files changed, 172 insertions(+), 29 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index bd5e6c3d569..c3716c4759a 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -65,7 +65,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.8,<1.9 +- polars>=1.11,<1.12 - pre-commit - ptxcompiler - pyarrow>=14.0.0,<18.0.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 565a3ebfa3c..38e131e79cb 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -63,7 +63,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.8,<1.9 +- polars>=1.11,<1.12 - pre-commit - pyarrow>=14.0.0,<18.0.0a0 - pydata-sphinx-theme!=0.14.2 diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml index e8fef715c60..edf92b930d9 100644 --- a/conda/recipes/cudf-polars/meta.yaml +++ b/conda/recipes/cudf-polars/meta.yaml @@ -43,7 +43,7 @@ requirements: run: - python - pylibcudf ={{ version }} - - polars >=1.8,<1.9 + - polars >=1.11,<1.12 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/dependencies.yaml b/dependencies.yaml index ff97b67f0ce..4804f7b00b0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -727,7 +727,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.8,<1.9 + - polars>=1.11,<1.12 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index eb93929cf61..f79e229d3f3 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -666,11 +666,11 @@ def __init__( raise NotImplementedError( "rolling window/groupby" ) # pragma: no cover; rollingwindow constructor has already raised + if self.options.dynamic: + raise NotImplementedError("dynamic group by") if any(GroupBy.check_agg(a.value) > 1 for a in self.agg_requests): raise NotImplementedError("Nested aggregations in groupby") self.agg_infos = [req.collect_agg(depth=0) for req in self.agg_requests] - if len(self.keys) == 0: - raise NotImplementedError("dynamic groupby") @staticmethod def check_agg(agg: expr.Expr) -> int: @@ -802,10 +802,10 @@ class Join(IR): right_on: tuple[expr.NamedExpr, ...] """List of expressions used as keys in the right frame.""" options: tuple[ - Literal["inner", "left", "right", "full", "leftsemi", "leftanti", "cross"], + Literal["inner", "left", "right", "full", "semi", "anti", "cross"], bool, tuple[int, int] | None, - str | None, + str, bool, ] """ @@ -840,7 +840,7 @@ def __init__( @staticmethod @cache def _joiners( - how: Literal["inner", "left", "right", "full", "leftsemi", "leftanti"], + how: Literal["inner", "left", "right", "full", "semi", "anti"], ) -> tuple[ Callable, plc.copying.OutOfBoundsPolicy, plc.copying.OutOfBoundsPolicy | None ]: @@ -862,13 +862,13 @@ def _joiners( plc.copying.OutOfBoundsPolicy.NULLIFY, plc.copying.OutOfBoundsPolicy.NULLIFY, ) - elif how == "leftsemi": + elif how == "semi": return ( plc.join.left_semi_join, plc.copying.OutOfBoundsPolicy.DONT_CHECK, None, ) - elif how == "leftanti": + elif how == "anti": return ( plc.join.left_anti_join, plc.copying.OutOfBoundsPolicy.DONT_CHECK, @@ -933,7 +933,6 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" left, right = (c.evaluate(cache=cache) for c in self.children) how, join_nulls, zlice, suffix, coalesce = self.options - suffix = "_right" if suffix is None else suffix if how == "cross": # Separate implementation, since cross_join returns the # result, not the gather maps @@ -955,7 +954,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: columns[left.num_columns :], right.column_names, strict=True ) ] - return DataFrame([*left_cols, *right_cols]) + return DataFrame([*left_cols, *right_cols]).slice(zlice) # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184 left_on = DataFrame(broadcast(*(e.evaluate(left) for e in self.left_on))) right_on = DataFrame(broadcast(*(e.evaluate(right) for e in self.right_on))) diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 522c4a6729c..c28f2c2651a 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -5,10 +5,11 @@ from __future__ import annotations +import functools import json from contextlib import AbstractContextManager, nullcontext from functools import singledispatch -from typing import Any +from typing import TYPE_CHECKING, Any import pyarrow as pa import pylibcudf as plc @@ -19,9 +20,13 @@ from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir from cudf_polars.dsl import expr, ir +from cudf_polars.dsl.traversal import make_recursive, reuse_if_unchanged from cudf_polars.typing import NodeTraverser from cudf_polars.utils import dtypes, sorting +if TYPE_CHECKING: + from cudf_polars.typing import ExprTransformer + __all__ = ["translate_ir", "translate_named_expr"] @@ -182,7 +187,71 @@ def _( with set_node(visitor, node.input_right): inp_right = translate_ir(visitor, n=None) right_on = [translate_named_expr(visitor, n=e) for e in node.right_on] - return ir.Join(schema, left_on, right_on, node.options, inp_left, inp_right) + if (how := node.options[0]) in { + "inner", + "left", + "right", + "full", + "cross", + "semi", + "anti", + }: + return ir.Join(schema, left_on, right_on, node.options, inp_left, inp_right) + else: + how, op1, op2 = how + if how != "ie_join": + raise NotImplementedError( + f"Unsupported join type {how}" + ) # pragma: no cover; asof joins not yet exposed + # No exposure of mixed/conditional joins in pylibcudf yet, so in + # the first instance, implement by doing a cross join followed by + # a filter. + _, join_nulls, zlice, suffix, coalesce = node.options + cross = ir.Join( + schema, + [], + [], + ("cross", join_nulls, None, suffix, coalesce), + inp_left, + inp_right, + ) + dtype = plc.DataType(plc.TypeId.BOOL8) + if op2 is None: + ops = [op1] + else: + ops = [op1, op2] + suffix = cross.options[3] + + # Column references in the right table refer to the post-join + # names, so with suffixes. + def _rename(e: expr.Expr, rec: ExprTransformer) -> expr.Expr: + if isinstance(e, expr.Col) and e.name in inp_left.schema: + return type(e)(e.dtype, f"{e.name}{suffix}") + return reuse_if_unchanged(e, rec) + + mapper = make_recursive(_rename) + right_on = [ + expr.NamedExpr( + f"{old.name}{suffix}" if old.name in inp_left.schema else old.name, new + ) + for new, old in zip( + (mapper(e.value) for e in right_on), right_on, strict=True + ) + ] + mask = functools.reduce( + functools.partial( + expr.BinOp, dtype, plc.binaryop.BinaryOperator.LOGICAL_AND + ), + ( + expr.BinOp(dtype, expr.BinOp._MAPPING[op], left.value, right.value) + for op, left, right in zip(ops, left_on, right_on, strict=True) + ), + ) + filtered = ir.Filter(schema, expr.NamedExpr("mask", mask), cross) + if zlice is not None: + offset, length = zlice + return ir.Slice(schema, offset, length, filtered) + return filtered @_translate_ir.register @@ -319,8 +388,7 @@ def translate_ir(visitor: NodeTraverser, *, n: int | None = None) -> ir.IR: # IR is versioned with major.minor, minor is bumped for backwards # compatible changes (e.g. adding new nodes), major is bumped for # incompatible changes (e.g. renaming nodes). - # Polars 1.7 changes definition of the CSV reader options schema name. - if (version := visitor.version()) >= (3, 0): + if (version := visitor.version()) >= (4, 0): raise NotImplementedError( f"No support for polars IR {version=}" ) # pragma: no cover; no such version for now. diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index 05b76d76808..a3607159e01 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -53,12 +53,34 @@ def pytest_configure(config: pytest.Config): "tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match", "tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match", "tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-columns]": "Correctly raises but different error", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-row_groups]": "Correctly raises but different error", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-prefiltered]": "Correctly raises but different error", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-none]": "Correctly raises but different error", "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394", + "tests/unit/io/test_lazy_parquet.py::test_scan_parquet_ignores_dtype_mismatch_for_non_projected_columns_19249[False-False]": "Needs some variant of cudf#16394", + "tests/unit/io/test_lazy_parquet.py::test_scan_parquet_ignores_dtype_mismatch_for_non_projected_columns_19249[True-False]": "Needs some variant of cudf#16394", "tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?", "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394", "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception", "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception", "tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-columns]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-columns]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-columns]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-columns]": "Mismatching column read cudf#16394", "tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match", "tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match", "tests/unit/io/test_scan.py::test_scan_with_filter[single-csv-async]": "Debug output on stderr doesn't match", @@ -107,6 +129,14 @@ def pytest_configure(config: pytest.Config): "tests/unit/operations/aggregation/test_aggregations.py::test_sum_empty_and_null_set": "libcudf sums column of all nulls to null, not zero", "tests/unit/operations/aggregation/test_aggregations.py::test_binary_op_agg_context_no_simplify_expr_12423": "groupby-agg of just literals should not produce collect_list", "tests/unit/operations/aggregation/test_aggregations.py::test_nan_inf_aggregation": "treatment of nans and nulls together is different in libcudf and polars in groupby-agg context", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func0-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func1-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func2-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func3-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func0-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype", @@ -124,13 +154,6 @@ def pytest_configure(config: pytest.Config): "tests/unit/operations/test_group_by.py::test_group_by_binary_agg_with_literal": "Incorrect broadcasting of literals in groupby-agg", "tests/unit/operations/test_group_by.py::test_aggregated_scalar_elementwise_15602": "Unsupported boolean function/dtype combination in groupby-agg", "tests/unit/operations/test_group_by.py::test_schemas[data1-expr1-expected_select1-expected_gb1]": "Mismatching dtypes, needs cudf#15852", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_by_monday_and_offset_5444": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_label[left-expected0]": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_label[right-expected1]": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_label[datapoint-expected2]": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_rolling_dynamic_sortedness_check": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_validation": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_15225": "IR needs to expose groupby-dynamic information", "tests/unit/operations/test_join.py::test_cross_join_slice_pushdown": "Need to implement slice pushdown for cross joins", "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU", "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU", @@ -140,6 +163,7 @@ def pytest_configure(config: pytest.Config): "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics": "Debug output on stderr doesn't match", "tests/unit/test_cse.py::test_cse_predicate_self_join": "Debug output on stderr doesn't match", "tests/unit/test_empty.py::test_empty_9137": "Mismatching dtypes, needs cudf#15852", + "tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised", # Maybe flaky, order-dependent? "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order", "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero", diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index a8bb634732f..2afdab1be4b 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "polars>=1.8,<1.9", + "polars>=1.11,<1.12", "pylibcudf==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py index 7d9ec98db97..501560d15b8 100644 --- a/python/cudf_polars/tests/test_join.py +++ b/python/cudf_polars/tests/test_join.py @@ -2,9 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations +from contextlib import nullcontext + import pytest import polars as pl +from polars.testing import assert_frame_equal from cudf_polars.testing.asserts import ( assert_gpu_result_equal, @@ -22,6 +25,11 @@ def how(request): return request.param +@pytest.fixture(params=[None, (1, 5), (1, None), (0, 2), (0, None)]) +def zlice(request): + return request.param + + @pytest.fixture def left(): return pl.LazyFrame( @@ -37,8 +45,9 @@ def left(): def right(): return pl.LazyFrame( { - "a": [1, 4, 3, 7, None, None], - "c": [2, 3, 4, 5, 6, 7], + "a": [1, 4, 3, 7, None, None, 1], + "c": [2, 3, 4, 5, 6, 7, 8], + "d": [6, None, 7, 8, -1, 2, 4], } ) @@ -70,11 +79,31 @@ def test_coalesce_join(left, right, how, join_nulls, join_expr): query = left.join( right, on=join_expr, how=how, join_nulls=join_nulls, coalesce=True ) - assert_gpu_result_equal(query, check_row_order=False) + assert_gpu_result_equal(query, check_row_order=how == "left") -def test_cross_join(left, right): +def test_left_join_with_slice(left, right, join_nulls, zlice): + q = left.join(right, on="a", how="left", join_nulls=join_nulls, coalesce=True) + ctx = nullcontext() + if zlice is not None: + q_expect = q.collect().slice(*zlice) + q = q.slice(*zlice) + if zlice == (1, 5) or zlice == (0, 2): + # https://github.com/pola-rs/polars/issues/19403 + # https://github.com/pola-rs/polars/issues/19405 + ctx = pytest.raises(AssertionError) + assert_frame_equal( + q_expect, q.collect(engine=pl.GPUEngine(raise_on_fail=True)) + ) + + with ctx: + assert_gpu_result_equal(q) + + +def test_cross_join(left, right, zlice): q = left.join(right, how="cross") + if zlice is not None: + q = q.slice(*zlice) assert_gpu_result_equal(q) @@ -86,3 +115,26 @@ def test_join_literal_key_unsupported(left, right, left_on, right_on): q = left.join(right, left_on=left_on, right_on=right_on, how="inner") assert_ir_translation_raises(q, NotImplementedError) + + +@pytest.mark.parametrize( + "conditions", + [ + [pl.col("a") < pl.col("a_right")], + [pl.col("a_right") <= pl.col("a") * 2], + [pl.col("b") * 2 > pl.col("a_right"), pl.col("a") == pl.col("c_right")], + [pl.col("b") * 2 <= pl.col("a_right"), pl.col("a") < pl.col("c_right")], + [pl.col("b") <= pl.col("a_right") * 7, pl.col("a") < pl.col("d") * 2], + ], +) +def test_join_where(left, right, conditions, zlice): + q = left.join_where(right, *conditions) + + assert_gpu_result_equal(q, check_row_order=False) + + if zlice is not None: + q_len = q.slice(*zlice).select(pl.len()) + # Can't compare result, since row order is not guaranteed and + # therefore we only check the length + + assert_gpu_result_equal(q_len) From b75036b12a8d5713e34162571cec24ac91941b85 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:34:15 -0400 Subject: [PATCH 27/76] Remove unused variable in internal merge_tdigests utility (#17151) Removes unused variable that contains host copy of the group_offsets data. This host variable appears to have been made obsolete by a combination of #16897 and #16780 Found while working on #17149 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17151 --- .../quantiles/tdigest/tdigest_aggregation.cu | 40 +++++-------------- 1 file changed, 9 insertions(+), 31 deletions(-) diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu index b0a84a6d50c..d27420658d6 100644 --- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu +++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu @@ -1126,12 +1126,8 @@ std::pair, rmm::device_uvector> generate_mer * `max` of 0. * * @param tdv input tdigests. The tdigests within this column are grouped by key. - * @param h_group_offsets a host iterator of the offsets to the start of each group. A group is - * counted as one even when the cluster is empty in it. The offsets should have the same values as - * the ones in `group_offsets`. * @param group_offsets a device iterator of the offsets to the start of each group. A group is - * counted as one even when the cluster is empty in it. The offsets should have the same values as - * the ones in `h_group_offsets`. + * counted as one even when the cluster is empty in it. * @param group_labels a device iterator of the the group label for each tdigest cluster including * empty clusters. * @param num_group_labels the number of unique group labels. @@ -1142,9 +1138,8 @@ std::pair, rmm::device_uvector> generate_mer * * @return A column containing the merged tdigests. */ -template +template std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, - HGroupOffsetIter h_group_offsets, GroupOffsetIter group_offsets, GroupLabelIter group_labels, size_t num_group_labels, @@ -1313,21 +1308,13 @@ std::unique_ptr reduce_merge_tdigest(column_view const& input, if (input.size() == 0) { return cudf::tdigest::detail::make_empty_tdigest_scalar(stream, mr); } - auto group_offsets_ = group_offsets_fn{input.size()}; - auto h_group_offsets = cudf::detail::make_counting_transform_iterator(0, group_offsets_); - auto group_offsets = cudf::detail::make_counting_transform_iterator(0, group_offsets_); - auto group_labels = thrust::make_constant_iterator(0); - return to_tdigest_scalar(merge_tdigests(tdv, - h_group_offsets, - group_offsets, - group_labels, - input.size(), - 1, - max_centroids, - stream, - mr), - stream, - mr); + auto group_offsets_ = group_offsets_fn{input.size()}; + auto group_offsets = cudf::detail::make_counting_transform_iterator(0, group_offsets_); + auto group_labels = thrust::make_constant_iterator(0); + return to_tdigest_scalar( + merge_tdigests(tdv, group_offsets, group_labels, input.size(), 1, max_centroids, stream, mr), + stream, + mr); } std::unique_ptr group_tdigest(column_view const& col, @@ -1376,16 +1363,7 @@ std::unique_ptr group_merge_tdigest(column_view const& input, return cudf::tdigest::detail::make_empty_tdigests_column(num_groups, stream, mr); } - // bring group offsets back to the host - std::vector h_group_offsets(group_offsets.size()); - cudaMemcpyAsync(h_group_offsets.data(), - group_offsets.begin(), - sizeof(size_type) * group_offsets.size(), - cudaMemcpyDefault, - stream); - return merge_tdigests(tdv, - h_group_offsets.begin(), group_offsets.data(), group_labels.data(), group_labels.size(), From 7115f20e91a314f07333cbd5c01adc62bf2fbb0c Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:34:44 -0400 Subject: [PATCH 28/76] Move `segmented_gather` function from the copying module to the lists module (#17148) This PR moves `segmented_gather` out of the copying module and into the lists module. And it uses the pylibcudf `segmented_gather` implementation in cudf python. Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/17148 --- python/cudf/cudf/_lib/copying.pyx | 26 +----------------- python/cudf/cudf/_lib/lists.pyx | 38 +++++++++++++++++---------- python/cudf/cudf/core/column/lists.py | 2 +- 3 files changed, 26 insertions(+), 40 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 30353c4be6c..4221e745e65 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -4,7 +4,7 @@ import pickle from libc.stdint cimport uint8_t, uintptr_t from libcpp cimport bool -from libcpp.memory cimport make_shared, shared_ptr, unique_ptr +from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from libcpp.vector cimport vector @@ -30,10 +30,6 @@ from libcpp.memory cimport make_unique cimport pylibcudf.libcudf.contiguous_split as cpp_contiguous_split from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.lists.gather cimport ( - segmented_gather as cpp_segmented_gather, -) -from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport size_type @@ -339,26 +335,6 @@ def get_element(Column input_column, size_type index): ) -@acquire_spill_lock() -def segmented_gather(Column source_column, Column gather_map): - cdef shared_ptr[lists_column_view] source_LCV = ( - make_shared[lists_column_view](source_column.view()) - ) - cdef shared_ptr[lists_column_view] gather_map_LCV = ( - make_shared[lists_column_view](gather_map.view()) - ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_segmented_gather( - source_LCV.get()[0], gather_map_LCV.get()[0]) - ) - - result = Column.from_unique_ptr(move(c_result)) - return result - - cdef class _CPackedColumns: @staticmethod diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 7e8710bedb6..12432ac6d5d 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport null_order, size_type from cudf._lib.column cimport Column from cudf._lib.utils cimport columns_from_pylibcudf_table -import pylibcudf +import pylibcudf as plc from pylibcudf cimport Scalar @@ -17,7 +17,7 @@ from pylibcudf cimport Scalar @acquire_spill_lock() def count_elements(Column col): return Column.from_pylibcudf( - pylibcudf.lists.count_elements( + plc.lists.count_elements( col.to_pylibcudf(mode="read")) ) @@ -25,8 +25,8 @@ def count_elements(Column col): @acquire_spill_lock() def explode_outer(list source_columns, int explode_column_idx): return columns_from_pylibcudf_table( - pylibcudf.lists.explode_outer( - pylibcudf.Table([c.to_pylibcudf(mode="read") for c in source_columns]), + plc.lists.explode_outer( + plc.Table([c.to_pylibcudf(mode="read") for c in source_columns]), explode_column_idx, ) ) @@ -35,7 +35,7 @@ def explode_outer(list source_columns, int explode_column_idx): @acquire_spill_lock() def distinct(Column col, bool nulls_equal, bool nans_all_equal): return Column.from_pylibcudf( - pylibcudf.lists.distinct( + plc.lists.distinct( col.to_pylibcudf(mode="read"), nulls_equal, nans_all_equal, @@ -46,7 +46,7 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal): @acquire_spill_lock() def sort_lists(Column col, bool ascending, str na_position): return Column.from_pylibcudf( - pylibcudf.lists.sort_lists( + plc.lists.sort_lists( col.to_pylibcudf(mode="read"), ascending, null_order.BEFORE if na_position == "first" else null_order.AFTER, @@ -58,7 +58,7 @@ def sort_lists(Column col, bool ascending, str na_position): @acquire_spill_lock() def extract_element_scalar(Column col, size_type index): return Column.from_pylibcudf( - pylibcudf.lists.extract_list_element( + plc.lists.extract_list_element( col.to_pylibcudf(mode="read"), index, ) @@ -68,7 +68,7 @@ def extract_element_scalar(Column col, size_type index): @acquire_spill_lock() def extract_element_column(Column col, Column index): return Column.from_pylibcudf( - pylibcudf.lists.extract_list_element( + plc.lists.extract_list_element( col.to_pylibcudf(mode="read"), index.to_pylibcudf(mode="read"), ) @@ -78,7 +78,7 @@ def extract_element_column(Column col, Column index): @acquire_spill_lock() def contains_scalar(Column col, py_search_key): return Column.from_pylibcudf( - pylibcudf.lists.contains( + plc.lists.contains( col.to_pylibcudf(mode="read"), py_search_key.device_value.c_value, ) @@ -88,7 +88,7 @@ def contains_scalar(Column col, py_search_key): @acquire_spill_lock() def index_of_scalar(Column col, object py_search_key): return Column.from_pylibcudf( - pylibcudf.lists.index_of( + plc.lists.index_of( col.to_pylibcudf(mode="read"), py_search_key.device_value.c_value, True, @@ -99,7 +99,7 @@ def index_of_scalar(Column col, object py_search_key): @acquire_spill_lock() def index_of_column(Column col, Column search_keys): return Column.from_pylibcudf( - pylibcudf.lists.index_of( + plc.lists.index_of( col.to_pylibcudf(mode="read"), search_keys.to_pylibcudf(mode="read"), True, @@ -110,8 +110,8 @@ def index_of_column(Column col, Column search_keys): @acquire_spill_lock() def concatenate_rows(list source_columns): return Column.from_pylibcudf( - pylibcudf.lists.concatenate_rows( - pylibcudf.Table([ + plc.lists.concatenate_rows( + plc.Table([ c.to_pylibcudf(mode="read") for c in source_columns ]) ) @@ -121,8 +121,18 @@ def concatenate_rows(list source_columns): @acquire_spill_lock() def concatenate_list_elements(Column input_column, dropna=False): return Column.from_pylibcudf( - pylibcudf.lists.concatenate_list_elements( + plc.lists.concatenate_list_elements( input_column.to_pylibcudf(mode="read"), dropna, ) ) + + +@acquire_spill_lock() +def segmented_gather(Column source_column, Column gather_map): + return Column.from_pylibcudf( + plc.lists.segmented_gather( + source_column.to_pylibcudf(mode="read"), + gather_map.to_pylibcudf(mode="read"), + ) + ) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index c6a39199e3b..e9d24d4f450 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -11,7 +11,6 @@ from typing_extensions import Self import cudf -from cudf._lib.copying import segmented_gather from cudf._lib.lists import ( concatenate_list_elements, concatenate_rows, @@ -22,6 +21,7 @@ extract_element_scalar, index_of_column, index_of_scalar, + segmented_gather, sort_lists, ) from cudf._lib.strings.convert.convert_lists import format_list_column From 03777f6b5d44d54316e55bff4e31d3e8e6583c25 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:06:25 -0400 Subject: [PATCH 29/76] Fix host-to-device copy missing sync in strings/duration convert (#17149) Fixes a missing stream sync when copying a temporary host vector to device. The host vector could be destroyed before the copy is completed. Updates the code to use vector factory function `make_device_uvector_sync()` instead of `cudaMemcpyAsync` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/17149 --- cpp/src/strings/convert/convert_durations.cu | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 0db1adf1223..f5d052c6657 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -152,12 +153,8 @@ struct format_compiler { } // create program in device memory - d_items.resize(items.size(), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_items.data(), - items.data(), - items.size() * sizeof(items[0]), - cudaMemcpyDefault, - stream.value())); + d_items = cudf::detail::make_device_uvector_sync( + items, stream, cudf::get_current_device_resource_ref()); } format_item const* compiled_format_items() { return d_items.data(); } From e98e6b9209ff8557d85cb9b828b895884b0c7b7a Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:07:06 -0400 Subject: [PATCH 30/76] Deprecate current libcudf nvtext minhash functions (#17152) Deprecates the current nvtext minhash functions some of which will be replaced in #16756 with a different signature. The others will no longer be used and removed in future release. The existing gtests and benchmarks will be retained for rework in the future release as well. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17152 --- cpp/benchmarks/CMakeLists.txt | 4 ++-- cpp/include/nvtext/minhash.hpp | 24 ++++++++++++++++++------ cpp/tests/CMakeLists.txt | 1 - 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index f013b31b3de..7f82b603912 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -348,8 +348,8 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp) ConfigureNVBench( - TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp - text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp text/word_minhash.cpp + TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/normalize.cpp + text/replace.cpp text/tokenize.cpp text/vocab.cpp ) # ################################################################################################## diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp index 7c909f1a948..42124461cdf 100644 --- a/cpp/include/nvtext/minhash.hpp +++ b/cpp/include/nvtext/minhash.hpp @@ -41,6 +41,8 @@ namespace CUDF_EXPORT nvtext { * * This function uses MurmurHash3_x86_32 for the hash algorithm. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if the width < 2 * * @param input Strings column to compute minhash @@ -51,7 +53,7 @@ namespace CUDF_EXPORT nvtext { * @param mr Device memory resource used to allocate the returned column's device memory * @return Minhash values for each string in input */ -std::unique_ptr minhash( +[[deprecated]] std::unique_ptr minhash( cudf::strings_column_view const& input, cudf::numeric_scalar seed = 0, cudf::size_type width = 4, @@ -71,6 +73,8 @@ std::unique_ptr minhash( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 - to be replaced in a future release + * * @throw std::invalid_argument if the width < 2 * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit @@ -83,7 +87,7 @@ std::unique_ptr minhash( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr minhash( +[[deprecated]] std::unique_ptr minhash( cudf::strings_column_view const& input, cudf::device_span seeds, cudf::size_type width = 4, @@ -102,6 +106,8 @@ std::unique_ptr minhash( * The hash function returns 2 uint64 values but only the first value * is used with the minhash calculation. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if the width < 2 * * @param input Strings column to compute minhash @@ -112,7 +118,7 @@ std::unique_ptr minhash( * @param mr Device memory resource used to allocate the returned column's device memory * @return Minhash values as UINT64 for each string in input */ -std::unique_ptr minhash64( +[[deprecated]] std::unique_ptr minhash64( cudf::strings_column_view const& input, cudf::numeric_scalar seed = 0, cudf::size_type width = 4, @@ -132,6 +138,8 @@ std::unique_ptr minhash64( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 - to be replaced in a future release + * * @throw std::invalid_argument if the width < 2 * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit @@ -144,7 +152,7 @@ std::unique_ptr minhash64( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr minhash64( +[[deprecated]] std::unique_ptr minhash64( cudf::strings_column_view const& input, cudf::device_span seeds, cudf::size_type width = 4, @@ -164,6 +172,8 @@ std::unique_ptr minhash64( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit * @@ -173,7 +183,7 @@ std::unique_ptr minhash64( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr word_minhash( +[[deprecated]] std::unique_ptr word_minhash( cudf::lists_column_view const& input, cudf::device_span seeds, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -193,6 +203,8 @@ std::unique_ptr word_minhash( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit * @@ -202,7 +214,7 @@ std::unique_ptr word_minhash( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr word_minhash64( +[[deprecated]] std::unique_ptr word_minhash64( cudf::lists_column_view const& input, cudf::device_span seeds, rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a4213dcbe94..b78a64d0e55 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -611,7 +611,6 @@ ConfigureTest( text/bpe_tests.cpp text/edit_distance_tests.cpp text/jaccard_tests.cpp - text/minhash_tests.cpp text/ngrams_tests.cpp text/ngrams_tokenize_tests.cpp text/normalize_tests.cpp From 0bb699e7616bbfb8564fb3d9db986756713aec8c Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:10:10 -0400 Subject: [PATCH 31/76] Move nvtext ngrams benchmarks to nvbench (#17173) Moves the `nvtext::generate_ngrams` and `nvtext::generate_character_ngrams` benchmarks from google-bench to nvbench. Target parameters are exposed to help with profiling. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/17173 --- cpp/benchmarks/CMakeLists.txt | 6 ++-- cpp/benchmarks/text/ngrams.cpp | 65 ++++++++++++++-------------------- 2 files changed, 29 insertions(+), 42 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 7f82b603912..2a4ac789046 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -345,11 +345,11 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary # ################################################################################################## # * nvtext benchmark ------------------------------------------------------------------- -ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp) +ConfigureBench(TEXT_BENCH text/subword.cpp) ConfigureNVBench( - TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/normalize.cpp - text/replace.cpp text/tokenize.cpp text/vocab.cpp + TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/ngrams.cpp + text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp index 8e48f8e9a05..43d57201b20 100644 --- a/cpp/benchmarks/text/ngrams.cpp +++ b/cpp/benchmarks/text/ngrams.cpp @@ -15,58 +15,45 @@ */ #include -#include -#include -#include #include #include #include -class TextNGrams : public cudf::benchmark {}; +#include -enum class ngrams_type { tokens, characters }; - -static void BM_ngrams(benchmark::State& state, ngrams_type nt) +static void bench_ngrams(nvbench::state& state) { - auto const n_rows = static_cast(state.range(0)); - auto const max_str_length = static_cast(state.range(1)); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const ngram_type = state.get_string("type"); + data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); auto const separator = cudf::string_scalar("_"); - for (auto _ : state) { - cuda_event_timer raii(state, true); - switch (nt) { - case ngrams_type::tokens: nvtext::generate_ngrams(input, 2, separator); break; - case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; - } - } + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); -} + auto chars_size = input.chars_size(cudf::get_default_stream()); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size * 2); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 5; - int const max_rowlen = 40; - int const len_mult = 2; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + if (ngram_type == "chars") { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::generate_character_ngrams(input); + }); + } else { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::generate_ngrams(input, 2, separator); + }); + } } -#define NVTEXT_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(TextNGrams, name) \ - (::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \ - BENCHMARK_REGISTER_F(TextNGrams, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -NVTEXT_BENCHMARK_DEFINE(tokens) -NVTEXT_BENCHMARK_DEFINE(characters) +NVBENCH_BENCH(bench_ngrams) + .set_name("ngrams") + .add_int64_axis("num_rows", {131072, 262144, 524288, 1048578}) + .add_int64_axis("row_width", {10, 20, 40, 100}) + .add_string_axis("type", {"chars", "tokens"}); From 2113bd6bbce62028eff0fa523a85ea859bf2bc08 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Fri, 25 Oct 2024 19:18:53 +0200 Subject: [PATCH 32/76] devcontainer: replace `VAULT_HOST` with `AWS_ROLE_ARN` (#17134) This PR is replacing the `VAULT_HOST` variable with `AWS_ROLE_ARN`. This is required to use the new token service to get AWS credentials. Authors: - Jordan Jacobelli (https://github.com/jjacobelli) Approvers: - Bradley Dice (https://github.com/bdice) - Paul Taylor (https://github.com/trxcllnt) URL: https://github.com/rapidsai/cudf/pull/17134 --- .devcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 8190b5d0297..315a389339a 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -31,6 +31,6 @@ ENV PYTHONDONTWRITEBYTECODE="1" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" -ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV HISTFILE="/home/coder/.cache/._bash_history" ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache" From 5cba4fb18883dd511e3f892bfbe3ac46caa2db6c Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 25 Oct 2024 22:38:38 +0200 Subject: [PATCH 33/76] lint: replace `isort` with Ruff's rule I (#16685) since #15312 moved formatting from Black to Rufft, it would make sense also unify import formatting under the same ruff so use build-in `I` rule instead of additional `isort` Authors: - Jirka Borovec (https://github.com/Borda) - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - https://github.com/jakirkham URL: https://github.com/rapidsai/cudf/pull/16685 --- .pre-commit-config.yaml | 12 +--- CONTRIBUTING.md | 4 +- .../developer_guide/contributing_guide.md | 3 +- docs/cudf/source/user_guide/10min.ipynb | 2 +- .../source/user_guide/guide-to-udfs.ipynb | 6 ++ pyproject.toml | 2 + python/cudf/benchmarks/conftest.py | 18 +++--- python/cudf/cudf/_typing.py | 10 ++-- python/cudf/cudf/core/buffer/buffer.py | 5 +- .../core/buffer/exposure_tracked_buffer.py | 5 +- python/cudf/cudf/core/column/__init__.py | 1 - python/cudf/cudf/core/column/categorical.py | 3 +- python/cudf/cudf/core/column/column.py | 3 +- python/cudf/cudf/core/column/datetime.py | 4 +- python/cudf/cudf/core/column/decimal.py | 3 +- python/cudf/cudf/core/column/lists.py | 4 +- python/cudf/cudf/core/column/methods.py | 4 +- python/cudf/cudf/core/column/numerical.py | 4 +- python/cudf/cudf/core/column/string.py | 4 +- python/cudf/cudf/core/column/timedelta.py | 4 +- python/cudf/cudf/core/column_accessor.py | 3 +- python/cudf/cudf/core/dataframe.py | 4 +- python/cudf/cudf/core/df_protocol.py | 7 ++- python/cudf/cudf/core/frame.py | 3 +- python/cudf/cudf/core/groupby/groupby.py | 4 +- python/cudf/cudf/core/index.py | 4 +- python/cudf/cudf/core/indexed_frame.py | 4 +- python/cudf/cudf/core/indexing_utils.py | 12 ++-- python/cudf/cudf/core/multiindex.py | 4 +- python/cudf/cudf/core/series.py | 4 +- python/cudf/cudf/core/tools/datetimes.py | 5 +- python/cudf/cudf/pandas/fast_slow_proxy.py | 4 +- python/cudf/cudf/pandas/module_accelerator.py | 2 +- .../pandas/scripts/analyze-test-failures.py | 1 + .../cudf/pandas/scripts/conftest-patch.py | 2 +- .../pandas/scripts/summarize-test-results.py | 3 +- .../cudf_pandas_tests/test_fast_slow_proxy.py | 3 +- python/cudf/pyproject.toml | 59 +++++-------------- python/cudf_kafka/pyproject.toml | 59 +++++-------------- python/custreamz/custreamz/tests/conftest.py | 1 + python/custreamz/pyproject.toml | 58 +++++------------- python/dask_cudf/dask_cudf/__init__.py | 20 +++---- python/dask_cudf/dask_cudf/expr/__init__.py | 4 +- python/dask_cudf/dask_cudf/io/__init__.py | 12 ++-- python/dask_cudf/pyproject.toml | 51 +++------------- .../pylibcudf/pylibcudf/tests/common/utils.py | 3 +- python/pylibcudf/pylibcudf/tests/conftest.py | 3 +- .../pylibcudf/pylibcudf/tests/io/test_avro.py | 3 +- .../pylibcudf/pylibcudf/tests/io/test_csv.py | 5 +- .../pylibcudf/pylibcudf/tests/io/test_json.py | 5 +- .../pylibcudf/pylibcudf/tests/io/test_orc.py | 3 +- .../pylibcudf/tests/io/test_parquet.py | 5 +- .../tests/io/test_source_sink_info.py | 3 +- .../pylibcudf/tests/io/test_timezone.py | 3 +- .../pylibcudf/tests/test_binaryops.py | 3 +- .../pylibcudf/tests/test_column_factories.py | 3 +- .../tests/test_column_from_device.py | 3 +- .../pylibcudf/tests/test_contiguous_split.py | 3 +- .../pylibcudf/pylibcudf/tests/test_copying.py | 3 +- .../pylibcudf/tests/test_datetime.py | 3 +- .../pylibcudf/tests/test_expressions.py | 3 +- .../pylibcudf/pylibcudf/tests/test_interop.py | 3 +- python/pylibcudf/pylibcudf/tests/test_join.py | 3 +- python/pylibcudf/pylibcudf/tests/test_json.py | 3 +- .../pylibcudf/tests/test_labeling.py | 3 +- .../pylibcudf/pylibcudf/tests/test_lists.py | 3 +- .../pylibcudf/tests/test_null_mask.py | 5 +- .../tests/test_nvtext_edit_distance.py | 3 +- .../tests/test_nvtext_generate_ngrams.py | 3 +- .../pylibcudf/tests/test_nvtext_jaccard.py | 3 +- .../pylibcudf/tests/test_nvtext_minhash.py | 3 +- .../tests/test_nvtext_ngrams_tokenize.py | 3 +- .../pylibcudf/tests/test_nvtext_normalize.py | 3 +- .../pylibcudf/tests/test_nvtext_replace.py | 3 +- .../pylibcudf/tests/test_nvtext_stemmer.py | 3 +- .../pylibcudf/tests/test_partitioning.py | 3 +- .../pylibcudf/tests/test_quantiles.py | 3 +- .../pylibcudf/tests/test_regex_program.py | 3 +- .../pylibcudf/pylibcudf/tests/test_reshape.py | 3 +- .../pylibcudf/pylibcudf/tests/test_round.py | 3 +- .../pylibcudf/tests/test_string_attributes.py | 3 +- .../pylibcudf/tests/test_string_capitalize.py | 3 +- .../pylibcudf/tests/test_string_case.py | 3 +- .../pylibcudf/tests/test_string_char_types.py | 3 +- .../pylibcudf/tests/test_string_combine.py | 3 +- .../pylibcudf/tests/test_string_contains.py | 3 +- .../pylibcudf/tests/test_string_convert.py | 3 +- .../tests/test_string_convert_booleans.py | 3 +- .../tests/test_string_convert_datetime.py | 3 +- .../tests/test_string_convert_durations.py | 3 +- .../tests/test_string_convert_fixed_point.py | 3 +- .../tests/test_string_convert_floats.py | 3 +- .../tests/test_string_convert_integers.py | 3 +- .../tests/test_string_convert_ipv4.py | 3 +- .../tests/test_string_convert_lists.py | 3 +- .../tests/test_string_convert_urls.py | 3 +- .../pylibcudf/tests/test_string_extract.py | 1 + .../pylibcudf/tests/test_string_find.py | 3 +- .../tests/test_string_find_multiple.py | 3 +- .../pylibcudf/tests/test_string_findall.py | 3 +- .../pylibcudf/tests/test_string_padding.py | 1 + .../pylibcudf/tests/test_string_repeat.py | 3 +- .../pylibcudf/tests/test_string_replace.py | 3 +- .../pylibcudf/tests/test_string_replace_re.py | 3 +- .../pylibcudf/tests/test_string_slice.py | 3 +- .../tests/test_string_split_partition.py | 3 +- .../tests/test_string_split_split.py | 3 +- .../pylibcudf/tests/test_string_strip.py | 3 +- .../pylibcudf/tests/test_string_translate.py | 3 +- .../pylibcudf/tests/test_string_wrap.py | 3 +- .../pylibcudf/pylibcudf/tests/test_table.py | 3 +- .../pylibcudf/tests/test_transform.py | 3 +- .../pylibcudf/tests/test_transpose.py | 3 +- python/pylibcudf/pyproject.toml | 56 +++++------------- 114 files changed, 318 insertions(+), 380 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 174dc72bf02..0e86407de11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,17 +16,6 @@ repos: ^cpp/cmake/thirdparty/patches/.*| ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* ) - - repo: https://github.com/PyCQA/isort - rev: 5.13.2 - hooks: - - id: isort - # Use the config file specific to each subproject so that each - # project can specify its own first/third-party packages. - args: ["--config-root=python/", "--resolve-all-configs"] - files: python/.* - exclude: | - (?x)^(^python/cudf_polars/.*) - types_or: [python, cython, pyi] - repo: https://github.com/MarcoGorelli/cython-lint rev: v0.16.2 hooks: @@ -150,6 +139,7 @@ repos: rev: v0.4.8 hooks: - id: ruff + args: ["--fix"] files: python/.*$ - id: ruff-format files: python/.*$ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f9cdde7c2b7..b55af21a300 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -293,8 +293,8 @@ In order to run doxygen as a linter on C++/CUDA code, run ./ci/checks/doxygen.sh ``` -Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/), -[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/). +Python code runs several linters including [Ruff](https://docs.astral.sh/ruff/) +with its various rules for Black-like formatting or Isort. cuDF also uses [codespell](https://github.com/codespell-project/codespell) to find spelling mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes, diff --git a/docs/cudf/source/developer_guide/contributing_guide.md b/docs/cudf/source/developer_guide/contributing_guide.md index 6fce268f309..f4d2c7319b3 100644 --- a/docs/cudf/source/developer_guide/contributing_guide.md +++ b/docs/cudf/source/developer_guide/contributing_guide.md @@ -15,8 +15,7 @@ Developers are strongly recommended to set up `pre-commit` prior to any developm The `.pre-commit-config.yaml` file at the root of the repo is the primary source of truth linting. Specifically, cuDF uses the following tools: -- [`ruff`](https://beta.ruff.rs/) checks for general code formatting compliance. -- [`isort`](https://pycqa.github.io/isort/) ensures imports are sorted consistently. +- [`ruff`](https://docs.astral.sh/ruff/) checks for general code formatting compliance. - [`mypy`](http://mypy-lang.org/) performs static type checking. In conjunction with [type hints](https://docs.python.org/3/library/typing.html), `mypy` can help catch various bugs that are otherwise difficult to find. diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb index 95f5f9734dd..46221b6015b 100644 --- a/docs/cudf/source/user_guide/10min.ipynb +++ b/docs/cudf/source/user_guide/10min.ipynb @@ -38,10 +38,10 @@ "import os\n", "\n", "import cupy as cp\n", + "import dask_cudf\n", "import pandas as pd\n", "\n", "import cudf\n", - "import dask_cudf\n", "\n", "cp.random.seed(12)\n", "\n", diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb index 75eafcc5387..abfe5a1b178 100644 --- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb +++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb @@ -101,6 +101,8 @@ "outputs": [], "source": [ "# define a scalar function\n", + "\n", + "\n", "def f(x):\n", " return x + 1" ] @@ -247,6 +249,8 @@ "outputs": [], "source": [ "# redefine the same function from above\n", + "\n", + "\n", "def f(x):\n", " return x + 1" ] @@ -1622,6 +1626,8 @@ "outputs": [], "source": [ "# a user defined aggregation function.\n", + "\n", + "\n", "def udaf(df):\n", " return df[\"b\"].max() - df[\"b\"].min() / 2" ] diff --git a/pyproject.toml b/pyproject.toml index 661c68ee62e..6933484f4e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,8 @@ select = [ "F", # pycodestyle Warning "W", + # isort + "I", # no-blank-line-before-function "D201", # one-blank-line-after-class diff --git a/python/cudf/benchmarks/conftest.py b/python/cudf/benchmarks/conftest.py index 7b2b71cf216..0e4afadccf5 100644 --- a/python/cudf/benchmarks/conftest.py +++ b/python/cudf/benchmarks/conftest.py @@ -56,27 +56,23 @@ # into the main repo. sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common")) -from config import cudf # noqa: W0611, E402, F401 -from utils import ( # noqa: E402 - OrderedSet, - collapse_fixtures, - column_generators, - make_fixture, -) - # Turn off isort until we upgrade to 5.8.0 # https://github.com/pycqa/isort/issues/1594 -# isort: off from config import ( # noqa: W0611, E402, F401 NUM_COLS, NUM_ROWS, collect_ignore, + cudf, # noqa: W0611, E402, F401 pytest_collection_modifyitems, pytest_sessionfinish, pytest_sessionstart, ) - -# isort: on +from utils import ( # noqa: E402 + OrderedSet, + collapse_fixtures, + column_generators, + make_fixture, +) @pytest_cases.fixture(params=[0, 1], ids=["AxisIndex", "AxisColumn"]) diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py index 6e8ad556b08..3b13cc258ab 100644 --- a/python/cudf/cudf/_typing.py +++ b/python/cudf/cudf/_typing.py @@ -1,8 +1,8 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. import sys -from collections.abc import Callable -from typing import TYPE_CHECKING, Any, Dict, Iterable, TypeVar, Union +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING, Any, TypeVar, Union import numpy as np from pandas import Period, Timedelta, Timestamp @@ -42,7 +42,7 @@ SeriesOrSingleColumnIndex = Union["cudf.Series", "cudf.core.index.Index"] # Groupby aggregation -AggType = Union[str, Callable] -MultiColumnAggType = Union[ - AggType, Iterable[AggType], Dict[Any, Iterable[AggType]] +AggType = Union[str, Callable] # noqa: UP007 +MultiColumnAggType = Union[ # noqa: UP007 + AggType, Iterable[AggType], dict[Any, Iterable[AggType]] ] diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py index caff019f575..ffa306bf93f 100644 --- a/python/cudf/cudf/core/buffer/buffer.py +++ b/python/cudf/cudf/core/buffer/buffer.py @@ -6,7 +6,7 @@ import pickle import weakref from types import SimpleNamespace -from typing import Any, Literal, Mapping +from typing import TYPE_CHECKING, Any, Literal import numpy from typing_extensions import Self @@ -18,6 +18,9 @@ from cudf.core.abc import Serializable from cudf.utils.string import format_bytes +if TYPE_CHECKING: + from collections.abc import Mapping + def host_memory_allocation(nbytes: int) -> memoryview: """Allocate host memory using NumPy diff --git a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py index 0bd8d6054b3..ecf9807cfc2 100644 --- a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py +++ b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py @@ -2,13 +2,16 @@ from __future__ import annotations -from typing import Literal, Mapping +from typing import TYPE_CHECKING, Literal from typing_extensions import Self import cudf from cudf.core.buffer.buffer import Buffer, BufferOwner +if TYPE_CHECKING: + from collections.abc import Mapping + class ExposureTrackedBuffer(Buffer): """An exposure tracked buffer. diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py index 06791df7dc0..a1e87d04bc9 100644 --- a/python/cudf/cudf/core/column/__init__.py +++ b/python/cudf/cudf/core/column/__init__.py @@ -29,4 +29,3 @@ Decimal128Column, DecimalBaseColumn, ) -from cudf.core.column.interval import IntervalColumn # noqa: F401 diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 864e87b5377..087d0ed65f5 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -4,7 +4,7 @@ import warnings from functools import cached_property -from typing import TYPE_CHECKING, Any, Mapping, Sequence, cast +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd @@ -26,6 +26,7 @@ if TYPE_CHECKING: from collections import abc + from collections.abc import Mapping, Sequence import numba.cuda diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7674565e2c3..d2cd6e8ac8f 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -4,10 +4,11 @@ import pickle from collections import abc +from collections.abc import MutableSequence, Sequence from functools import cached_property from itertools import chain from types import SimpleNamespace -from typing import TYPE_CHECKING, Any, Literal, MutableSequence, Sequence, cast +from typing import TYPE_CHECKING, Any, Literal, cast import cupy import numpy as np diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 2c9b0baa9b6..b6dc250e64d 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -8,7 +8,7 @@ import locale import re from locale import nl_langinfo -from typing import TYPE_CHECKING, Literal, Sequence, cast +from typing import TYPE_CHECKING, Literal, cast import numpy as np import pandas as pd @@ -31,6 +31,8 @@ from cudf.utils.utils import _all_bools_with_nulls if TYPE_CHECKING: + from collections.abc import Sequence + from cudf._typing import ( ColumnBinaryOperand, DatetimeLikeScalar, diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 8803ebd6791..8ae06f72d1e 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -3,8 +3,9 @@ from __future__ import annotations import warnings +from collections.abc import Sequence from decimal import Decimal -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, cast import cupy as cp import numpy as np diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index e9d24d4f450..6b25e568f00 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -3,7 +3,7 @@ from __future__ import annotations from functools import cached_property -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, cast import numpy as np import pandas as pd @@ -34,6 +34,8 @@ from cudf.core.missing import NA if TYPE_CHECKING: + from collections.abc import Sequence + from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike from cudf.core.buffer import Buffer diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index 05a0ab2e09a..a91c080fe21 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -2,9 +2,7 @@ from __future__ import annotations -from typing import Union, overload - -from typing_extensions import Literal +from typing import Literal, Union, overload import cudf import cudf.core.column diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 78d2814ed26..620cae65374 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -3,7 +3,7 @@ from __future__ import annotations import functools -from typing import TYPE_CHECKING, Any, Sequence, cast +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd @@ -28,7 +28,7 @@ from .numerical_base import NumericalBaseColumn if TYPE_CHECKING: - from collections.abc import Callable + from collections.abc import Callable, Sequence from cudf._typing import ( ColumnBinaryOperand, diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index b25e486d855..856ce0f75de 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5,7 +5,7 @@ import re import warnings from functools import cached_property -from typing import TYPE_CHECKING, Sequence, cast, overload +from typing import TYPE_CHECKING, cast, overload import numpy as np import pandas as pd @@ -35,6 +35,8 @@ def str_to_boolean(column: StringColumn): if TYPE_CHECKING: + from collections.abc import Sequence + import cupy import numba.cuda diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 6b6f3e517a8..087d6474e7f 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -4,7 +4,7 @@ import datetime import functools -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, cast import numpy as np import pandas as pd @@ -19,6 +19,8 @@ from cudf.utils.utils import _all_bools_with_nulls if TYPE_CHECKING: + from collections.abc import Sequence + from cudf._typing import ColumnBinaryOperand, DatetimeLikeScalar, Dtype _unit_to_nanoseconds_conversion = { diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index bc093fdaa9a..496e86ed709 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -5,8 +5,9 @@ import itertools import sys from collections import abc +from collections.abc import Mapping from functools import cached_property, reduce -from typing import TYPE_CHECKING, Any, Mapping, cast +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 7d4d34f5b04..bf1c39b23da 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -13,8 +13,8 @@ import textwrap import warnings from collections import abc, defaultdict -from collections.abc import Callable, Iterator -from typing import TYPE_CHECKING, Any, Literal, MutableMapping, cast +from collections.abc import Callable, Iterator, MutableMapping +from typing import TYPE_CHECKING, Any, Literal, cast import cupy import numba diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py index 5250a741d3d..aa601a2b322 100644 --- a/python/cudf/cudf/core/df_protocol.py +++ b/python/cudf/cudf/core/df_protocol.py @@ -3,7 +3,7 @@ import enum from collections import abc -from typing import Any, Iterable, Mapping, Sequence, Tuple, cast +from typing import TYPE_CHECKING, Any, cast import cupy as cp import numpy as np @@ -20,6 +20,9 @@ build_column, ) +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping, Sequence + # Implementation of interchange protocol classes # ---------------------------------------------- @@ -61,7 +64,7 @@ class _MaskKind(enum.IntEnum): _DtypeKind.BOOL, _DtypeKind.STRING, } -ProtoDtype = Tuple[_DtypeKind, int, str, str] +ProtoDtype = tuple[_DtypeKind, int, str, str] class _CuDFBuffer: diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 37ad6b8fabb..205edd91d9d 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -6,7 +6,7 @@ import pickle import warnings from collections import abc -from typing import TYPE_CHECKING, Any, Literal, MutableMapping +from typing import TYPE_CHECKING, Any, Literal # TODO: The `numpy` import is needed for typing purposes during doc builds # only, need to figure out why the `np` alias is insufficient then remove. @@ -36,6 +36,7 @@ from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf if TYPE_CHECKING: + from collections.abc import MutableMapping from types import ModuleType from cudf._typing import Dtype, ScalarLike diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 81b20488d8d..6630bd96c01 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -8,7 +8,7 @@ import warnings from collections import abc from functools import cached_property -from typing import TYPE_CHECKING, Any, Iterable, Literal +from typing import TYPE_CHECKING, Any, Literal import cupy as cp import numpy as np @@ -36,6 +36,8 @@ from cudf.utils.utils import GetAttrGetItemMixin if TYPE_CHECKING: + from collections.abc import Iterable + from cudf._typing import ( AggType, DataFrameOrSeries, diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index cd07c58c5d9..1b90e9f9df0 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -5,10 +5,10 @@ import operator import pickle import warnings -from collections.abc import Hashable +from collections.abc import Hashable, MutableMapping from functools import cache, cached_property from numbers import Number -from typing import TYPE_CHECKING, Any, Literal, MutableMapping, cast +from typing import TYPE_CHECKING, Any, Literal, cast import cupy import numpy as np diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 5952815deef..e031f2a4e8e 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -10,9 +10,7 @@ from typing import ( TYPE_CHECKING, Any, - Callable, Literal, - MutableMapping, TypeVar, cast, ) @@ -63,6 +61,8 @@ from cudf.utils.utils import _warn_no_dask_cudf if TYPE_CHECKING: + from collections.abc import Callable, MutableMapping + from cudf._typing import ( ColumnLike, DataFrameOrSeries, diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py index 8182e5cede2..ce6a5c960dd 100644 --- a/python/cudf/cudf/core/indexing_utils.py +++ b/python/cudf/cudf/core/indexing_utils.py @@ -3,9 +3,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, List, Union - -from typing_extensions import TypeAlias +from typing import Any, TypeAlias import cudf from cudf.api.types import _is_scalar_or_zero_d_array, is_integer @@ -46,11 +44,11 @@ class ScalarIndexer: key: GatherMap -IndexingSpec: TypeAlias = Union[ - EmptyIndexer, MapIndexer, MaskIndexer, ScalarIndexer, SliceIndexer -] +IndexingSpec: TypeAlias = ( + EmptyIndexer | MapIndexer | MaskIndexer | ScalarIndexer | SliceIndexer +) -ColumnLabels: TypeAlias = List[str] +ColumnLabels: TypeAlias = list[str] def destructure_iloc_key( diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 92d094d9de5..bfff62f0a89 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -8,7 +8,7 @@ import pickle import warnings from functools import cached_property -from typing import TYPE_CHECKING, Any, MutableMapping +from typing import TYPE_CHECKING, Any import cupy as cp import numpy as np @@ -36,7 +36,7 @@ from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name if TYPE_CHECKING: - from collections.abc import Generator, Hashable + from collections.abc import Generator, Hashable, MutableMapping from typing_extensions import Self diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 29ed18ac0ce..9b60424c924 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -9,7 +9,7 @@ import warnings from collections import abc from shutil import get_terminal_size -from typing import TYPE_CHECKING, Any, Literal, MutableMapping +from typing import TYPE_CHECKING, Any, Literal import cupy import numpy as np @@ -71,6 +71,8 @@ from cudf.utils.performance_tracking import _performance_tracking if TYPE_CHECKING: + from collections.abc import MutableMapping + import pyarrow as pa from cudf._typing import ( diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 68f34fa28ff..885e7b16644 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -4,7 +4,7 @@ import math import re import warnings -from typing import Literal, Sequence +from typing import TYPE_CHECKING, Literal import numpy as np import pandas as pd @@ -20,6 +20,9 @@ from cudf.core import column from cudf.core.index import ensure_index +if TYPE_CHECKING: + from collections.abc import Sequence + # https://github.com/pandas-dev/pandas/blob/2.2.x/pandas/core/tools/datetimes.py#L1112 _unit_map = { "year": "year", diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index c364d55e677..73afde407db 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -10,9 +10,9 @@ import pickle import types import warnings -from collections.abc import Callable, Iterator +from collections.abc import Callable, Iterator, Mapping from enum import IntEnum -from typing import Any, Literal, Mapping +from typing import Any, Literal import numpy as np diff --git a/python/cudf/cudf/pandas/module_accelerator.py b/python/cudf/cudf/pandas/module_accelerator.py index f82e300e83d..38103a71908 100644 --- a/python/cudf/cudf/pandas/module_accelerator.py +++ b/python/cudf/cudf/pandas/module_accelerator.py @@ -17,7 +17,7 @@ from abc import abstractmethod from importlib._bootstrap import _ImportLockContext as ImportLock from types import ModuleType -from typing import Any, ContextManager, NamedTuple +from typing import Any, ContextManager, NamedTuple # noqa: UP035 from typing_extensions import Self diff --git a/python/cudf/cudf/pandas/scripts/analyze-test-failures.py b/python/cudf/cudf/pandas/scripts/analyze-test-failures.py index 8870fbc5c28..bb2fc00d9fc 100644 --- a/python/cudf/cudf/pandas/scripts/analyze-test-failures.py +++ b/python/cudf/cudf/pandas/scripts/analyze-test-failures.py @@ -9,6 +9,7 @@ python analyze-test-failures.py Example: +------- python analyze-test-failures.py log.json frame/* """ diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index d12d2697729..59966a5ff0c 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -35,7 +35,7 @@ def null_assert_warnings(*args, **kwargs): @pytest.fixture(scope="session", autouse=True) # type: ignore def patch_testing_functions(): - tm.assert_produces_warning = null_assert_warnings + tm.assert_produces_warning = null_assert_warnings # noqa: F821 pytest.raises = replace_kwargs({"match": None})(pytest.raises) diff --git a/python/cudf/cudf/pandas/scripts/summarize-test-results.py b/python/cudf/cudf/pandas/scripts/summarize-test-results.py index 4ea0b3b4413..a0ad872e4c7 100644 --- a/python/cudf/cudf/pandas/scripts/summarize-test-results.py +++ b/python/cudf/cudf/pandas/scripts/summarize-test-results.py @@ -5,7 +5,8 @@ """ Summarizes the test results per module. -Examples: +Examples +-------- python summarize-test-results.py log.json python summarize-test-results.py log.json --output json python summarize-test-results.py log.json --output table diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py index a75a20a4681..63fd9601fc1 100644 --- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py +++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py @@ -387,7 +387,8 @@ def test_dir_bound_method( ): """This test will fail because dir for bound methods is currently incorrect, but we have no way to fix it without materializing the slow - type, which is unnecessarily expensive.""" + type, which is unnecessarily expensive. + """ Fast, FastIntermediate = fast_and_intermediate_with_doc Slow, SlowIntermediate = slow_and_intermediate_with_doc diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index feab04ffadc..80201dd84db 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -81,50 +81,6 @@ cudf-pandas-tests = [ Homepage = "https://github.com/rapidsai/cudf" Documentation = "https://docs.rapids.ai/api/cudf/stable/" -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", - "pylibcudf" -] -known_first_party = [ - "cudf", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] - [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" @@ -174,3 +130,18 @@ wheel.packages = ["cudf"] provider = "scikit_build_core.metadata.regex" input = "cudf/VERSION" regex = "(?P.*)" + +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["cudf"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm", "pylibcudf"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 87e19a2bccf..667cd7b1db8 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -32,51 +32,20 @@ test = [ Homepage = "https://github.com/rapidsai/cudf" Documentation = "https://docs.rapids.ai/api/cudf/stable/" -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", - "streamz", -] -known_rapids = [ - "rmm", - "cudf", - "dask_cudf", -] -known_first_party = [ - "cudf_kafka", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["cudf_kafka"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda", "streamz"] +rapids = ["rmm", "cudf", "dask_cudf"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" diff --git a/python/custreamz/custreamz/tests/conftest.py b/python/custreamz/custreamz/tests/conftest.py index 1cda9b71387..c5135bc6414 100644 --- a/python/custreamz/custreamz/tests/conftest.py +++ b/python/custreamz/custreamz/tests/conftest.py @@ -2,6 +2,7 @@ import socket import pytest + from custreamz import kafka diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index af45f49d9b4..a8ab05a3922 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -65,50 +65,20 @@ include = [ ] exclude = ["*tests*"] -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", - "cudf", - "dask_cudf", -] -known_first_party = [ - "streamz", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["streamz"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm", "cudf", "dask_cudf"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" diff --git a/python/dask_cudf/dask_cudf/__init__.py b/python/dask_cudf/dask_cudf/__init__.py index 04c2ad65b99..f9df22cc436 100644 --- a/python/dask_cudf/dask_cudf/__init__.py +++ b/python/dask_cudf/dask_cudf/__init__.py @@ -7,15 +7,15 @@ # do anything for dask==2024.2.0) config.set({"dataframe.query-planning-warning": False}) -import dask.dataframe as dd -from dask.dataframe import from_delayed +import dask.dataframe as dd # noqa: E402 +from dask.dataframe import from_delayed # noqa: E402 -import cudf +import cudf # noqa: E402 -from . import backends -from ._version import __git_commit__, __version__ -from .core import concat, from_cudf, from_dask_dataframe -from .expr import QUERY_PLANNING_ON +from . import backends # noqa: E402, F401 +from ._version import __git_commit__, __version__ # noqa: E402, F401 +from .core import concat, from_cudf, from_dask_dataframe # noqa: E402 +from .expr import QUERY_PLANNING_ON # noqa: E402 def read_csv(*args, **kwargs): @@ -55,9 +55,9 @@ def inner_func(*args, **kwargs): to_orc = raise_not_implemented_error("to_orc") else: - from .core import DataFrame, Index, Series - from .groupby import groupby_agg - from .io import read_text, to_orc + from .core import DataFrame, Index, Series # noqa: F401 + from .groupby import groupby_agg # noqa: F401 + from .io import read_text, to_orc # noqa: F401 __all__ = [ diff --git a/python/dask_cudf/dask_cudf/expr/__init__.py b/python/dask_cudf/dask_cudf/expr/__init__.py index a76b655ef42..6dadadd5263 100644 --- a/python/dask_cudf/dask_cudf/expr/__init__.py +++ b/python/dask_cudf/dask_cudf/expr/__init__.py @@ -12,8 +12,8 @@ config.set({"dataframe.shuffle.method": "tasks"}) try: - import dask_cudf.expr._collection - import dask_cudf.expr._expr + import dask_cudf.expr._collection # noqa: F401 + import dask_cudf.expr._expr # noqa: F401 except ImportError as err: # Dask *should* raise an error before this. diff --git a/python/dask_cudf/dask_cudf/io/__init__.py b/python/dask_cudf/dask_cudf/io/__init__.py index 76bb2ea99b4..0421bd755f4 100644 --- a/python/dask_cudf/dask_cudf/io/__init__.py +++ b/python/dask_cudf/dask_cudf/io/__init__.py @@ -1,11 +1,11 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. -from .csv import read_csv -from .json import read_json -from .orc import read_orc, to_orc -from .text import read_text +from .csv import read_csv # noqa: F401 +from .json import read_json # noqa: F401 +from .orc import read_orc, to_orc # noqa: F401 +from .text import read_text # noqa: F401 try: - from .parquet import read_parquet, to_parquet + from .parquet import read_parquet, to_parquet # noqa: F401 except ImportError: pass diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 705865d083b..862e8f36eaa 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -69,50 +69,17 @@ version = {file = "dask_cudf/VERSION"} [tool.setuptools.packages.find] exclude = ["*tests*"] -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true +[tool.ruff] +extend = "../../pyproject.toml" -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", - "cudf", -] -known_first_party = [ - "dask_cudf", -] +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["dask_cudf"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", -] +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm", "cudf"] [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" diff --git a/python/pylibcudf/pylibcudf/tests/common/utils.py b/python/pylibcudf/pylibcudf/tests/common/utils.py index 9f389fa42c4..d95849ef371 100644 --- a/python/pylibcudf/pylibcudf/tests/common/utils.py +++ b/python/pylibcudf/pylibcudf/tests/common/utils.py @@ -7,10 +7,11 @@ import numpy as np import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from pyarrow.orc import write_table as orc_write_table from pyarrow.parquet import write_table as pq_write_table + +import pylibcudf as plc from pylibcudf.io.types import CompressionType diff --git a/python/pylibcudf/pylibcudf/tests/conftest.py b/python/pylibcudf/pylibcudf/tests/conftest.py index fdce6f353ca..a19a8835498 100644 --- a/python/pylibcudf/pylibcudf/tests/conftest.py +++ b/python/pylibcudf/pylibcudf/tests/conftest.py @@ -8,8 +8,9 @@ import numpy as np import pyarrow as pa -import pylibcudf as plc import pytest + +import pylibcudf as plc from pylibcudf.io.types import CompressionType sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common")) diff --git a/python/pylibcudf/pylibcudf/tests/io/test_avro.py b/python/pylibcudf/pylibcudf/tests/io/test_avro.py index 0cd5064a697..3d9d99ffa61 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_avro.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_avro.py @@ -5,10 +5,11 @@ import fastavro import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_and_meta_eq +import pylibcudf as plc + avro_dtype_pairs = [ ("boolean", pa.bool_()), ("int", pa.int32()), diff --git a/python/pylibcudf/pylibcudf/tests/io/test_csv.py b/python/pylibcudf/pylibcudf/tests/io/test_csv.py index ab26f23418d..22c83acc47c 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_csv.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_csv.py @@ -5,9 +5,7 @@ import pandas as pd import pyarrow as pa -import pylibcudf as plc import pytest -from pylibcudf.io.types import CompressionType from utils import ( _convert_types, assert_table_and_meta_eq, @@ -15,6 +13,9 @@ write_source_str, ) +import pylibcudf as plc +from pylibcudf.io.types import CompressionType + # Shared kwargs to pass to make_source _COMMON_CSV_SOURCE_KWARGS = { "format": "csv", diff --git a/python/pylibcudf/pylibcudf/tests/io/test_json.py b/python/pylibcudf/pylibcudf/tests/io/test_json.py index 9d976fedf00..453e5ce32a8 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_json.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_json.py @@ -3,9 +3,7 @@ import pandas as pd import pyarrow as pa -import pylibcudf as plc import pytest -from pylibcudf.io.types import CompressionType from utils import ( assert_table_and_meta_eq, make_source, @@ -13,6 +11,9 @@ write_source_str, ) +import pylibcudf as plc +from pylibcudf.io.types import CompressionType + # Shared kwargs to pass to make_source _COMMON_JSON_SOURCE_KWARGS = {"format": "json", "orient": "records"} diff --git a/python/pylibcudf/pylibcudf/tests/io/test_orc.py b/python/pylibcudf/pylibcudf/tests/io/test_orc.py index 42b14b1feff..5ed660ba6cf 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_orc.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_orc.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import _convert_types, assert_table_and_meta_eq, make_source +import pylibcudf as plc + # Shared kwargs to pass to make_source _COMMON_ORC_SOURCE_KWARGS = {"format": "orc"} diff --git a/python/pylibcudf/pylibcudf/tests/io/test_parquet.py b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py index f6e843ccf66..41298601539 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_parquet.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py @@ -1,9 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from pyarrow.parquet import read_table +from utils import assert_table_and_meta_eq, make_source + +import pylibcudf as plc from pylibcudf.expressions import ( ASTOperator, ColumnNameReference, @@ -11,7 +13,6 @@ Literal, Operation, ) -from utils import assert_table_and_meta_eq, make_source # Shared kwargs to pass to make_source _COMMON_PARQUET_SOURCE_KWARGS = {"format": "parquet"} diff --git a/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py index 747f58ec8cf..0c43c363e55 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py @@ -2,9 +2,10 @@ import io -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.fixture(params=[plc.io.SourceInfo, plc.io.SinkInfo]) def io_class(request): diff --git a/python/pylibcudf/pylibcudf/tests/io/test_timezone.py b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py index 76b0424b2af..b3555013927 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_timezone.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import zoneinfo -import pylibcudf as plc import pytest +import pylibcudf as plc + def test_make_timezone_transition_table(): if len(zoneinfo.TZPATH) == 0: diff --git a/python/pylibcudf/pylibcudf/tests/test_binaryops.py b/python/pylibcudf/pylibcudf/tests/test_binaryops.py index f784cb3c191..bbb08e8b95a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_binaryops.py +++ b/python/pylibcudf/pylibcudf/tests/test_binaryops.py @@ -4,10 +4,11 @@ import numpy as np import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + def idfn(param): ltype, rtype, outtype, plc_op, _ = param diff --git a/python/pylibcudf/pylibcudf/tests/test_column_factories.py b/python/pylibcudf/pylibcudf/tests/test_column_factories.py index 8cedbc6d42f..e317362a76b 100644 --- a/python/pylibcudf/pylibcudf/tests/test_column_factories.py +++ b/python/pylibcudf/pylibcudf/tests/test_column_factories.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import DEFAULT_STRUCT_TESTING_TYPE, assert_column_eq +import pylibcudf as plc + EMPTY_COL_SIZE = 3 NUMERIC_TYPES = [ diff --git a/python/pylibcudf/pylibcudf/tests/test_column_from_device.py b/python/pylibcudf/pylibcudf/tests/test_column_from_device.py index 0e129fdf0ef..24cd6b9e35f 100644 --- a/python/pylibcudf/pylibcudf/tests/test_column_from_device.py +++ b/python/pylibcudf/pylibcudf/tests/test_column_from_device.py @@ -1,12 +1,13 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq import rmm +import pylibcudf as plc + VALID_TYPES = [ pa.int8(), pa.int16(), diff --git a/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py b/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py index 7a5c1664eed..6d8b5993964 100644 --- a/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py +++ b/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_eq +import pylibcudf as plc + param_pyarrow_tables = [ pa.table([]), pa.table({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}), diff --git a/python/pylibcudf/pylibcudf/tests/test_copying.py b/python/pylibcudf/pylibcudf/tests/test_copying.py index 628682d0a66..c0a41b96b1a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_copying.py +++ b/python/pylibcudf/pylibcudf/tests/test_copying.py @@ -2,7 +2,6 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import ( DEFAULT_STRUCT_TESTING_TYPE, @@ -16,6 +15,8 @@ metadata_from_arrow_type, ) +import pylibcudf as plc + # TODO: consider moving this to conftest and "pairing" # it with pa_type, so that they don't get out of sync diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py index 75930d59058..a80ab8d9f65 100644 --- a/python/pylibcudf/pylibcudf/tests/test_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py @@ -4,10 +4,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module", params=["s", "ms", "us", "ns"]) def datetime_column(has_nulls, request): diff --git a/python/pylibcudf/pylibcudf/tests/test_expressions.py b/python/pylibcudf/pylibcudf/tests/test_expressions.py index 5894ef4624c..6eabd6db617 100644 --- a/python/pylibcudf/pylibcudf/tests/test_expressions.py +++ b/python/pylibcudf/pylibcudf/tests/test_expressions.py @@ -1,8 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + # We can't really evaluate these expressions, so just make sure # construction works properly diff --git a/python/pylibcudf/pylibcudf/tests/test_interop.py b/python/pylibcudf/pylibcudf/tests/test_interop.py index 01c998f16d4..e4f5174ad9b 100644 --- a/python/pylibcudf/pylibcudf/tests/test_interop.py +++ b/python/pylibcudf/pylibcudf/tests/test_interop.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + def test_list_dtype_roundtrip(): list_type = pa.list_(pa.int32()) diff --git a/python/pylibcudf/pylibcudf/tests/test_join.py b/python/pylibcudf/pylibcudf/tests/test_join.py index 61e02f4d28d..f43a56046a4 100644 --- a/python/pylibcudf/pylibcudf/tests/test_join.py +++ b/python/pylibcudf/pylibcudf/tests/test_join.py @@ -2,9 +2,10 @@ import numpy as np import pyarrow as pa -import pylibcudf as plc from utils import assert_table_eq +import pylibcudf as plc + def test_cross_join(): left = pa.Table.from_arrays([[0, 1, 2], [3, 4, 5]], names=["a", "b"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_json.py b/python/pylibcudf/pylibcudf/tests/test_json.py index 3d2955211f8..486a9524e92 100644 --- a/python/pylibcudf/pylibcudf/tests/test_json.py +++ b/python/pylibcudf/pylibcudf/tests/test_json.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def plc_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_labeling.py b/python/pylibcudf/pylibcudf/tests/test_labeling.py index f7fb7463b50..beacfc63ce5 100644 --- a/python/pylibcudf/pylibcudf/tests/test_labeling.py +++ b/python/pylibcudf/pylibcudf/tests/test_labeling.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize("left_inclusive", [True, False]) @pytest.mark.parametrize("right_inclusive", [True, False]) diff --git a/python/pylibcudf/pylibcudf/tests/test_lists.py b/python/pylibcudf/pylibcudf/tests/test_lists.py index 2353a6ff8f9..f3ef555f11d 100644 --- a/python/pylibcudf/pylibcudf/tests/test_lists.py +++ b/python/pylibcudf/pylibcudf/tests/test_lists.py @@ -3,10 +3,11 @@ import numpy as np import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture def test_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_null_mask.py b/python/pylibcudf/pylibcudf/tests/test_null_mask.py index 3edcae59edc..cd3da856de2 100644 --- a/python/pylibcudf/pylibcudf/tests/test_null_mask.py +++ b/python/pylibcudf/pylibcudf/tests/test_null_mask.py @@ -1,12 +1,13 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest -from pylibcudf.null_mask import MaskState import rmm +import pylibcudf as plc +from pylibcudf.null_mask import MaskState + @pytest.fixture(params=[False, True]) def nullable(request): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py index 7d93c471cc4..8b14e0db576 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def edit_distance_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py index 5cf9874d595..fae4685f81b 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py index d5a168426b1..05fe7b53c16 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py index 4e389a63f90..ead9ee094af 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.fixture(scope="module", params=[pa.uint32(), pa.uint64()]) def minhash_input_data(request): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py index 283a009288d..84748b5597e 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py index fe28b83c09a..25b6d1389ec 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def norm_spaces_input_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py index 0fb54bb4ee1..65687f31c85 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py index 75d56f587a4..e7f4a971f08 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_partitioning.py b/python/pylibcudf/pylibcudf/tests/test_partitioning.py index 444d0089d2c..c55e54cebc6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_partitioning.py +++ b/python/pylibcudf/pylibcudf/tests/test_partitioning.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def partitioning_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_quantiles.py b/python/pylibcudf/pylibcudf/tests/test_quantiles.py index bac56691306..e4a24fb1c98 100644 --- a/python/pylibcudf/pylibcudf/tests/test_quantiles.py +++ b/python/pylibcudf/pylibcudf/tests/test_quantiles.py @@ -3,10 +3,11 @@ import numpy as np import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq, assert_table_eq +import pylibcudf as plc + # Map pylibcudf interpolation options to pyarrow options interp_mapping = { plc.types.Interpolation.LINEAR: "linear", diff --git a/python/pylibcudf/pylibcudf/tests/test_regex_program.py b/python/pylibcudf/pylibcudf/tests/test_regex_program.py index 777315df538..52598f2c462 100644 --- a/python/pylibcudf/pylibcudf/tests/test_regex_program.py +++ b/python/pylibcudf/pylibcudf/tests/test_regex_program.py @@ -1,8 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize("pat", ["(", "*", "\\"]) def test_regex_program_invalid(pat): diff --git a/python/pylibcudf/pylibcudf/tests/test_reshape.py b/python/pylibcudf/pylibcudf/tests/test_reshape.py index 01115bc363a..ef23e23766a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_reshape.py +++ b/python/pylibcudf/pylibcudf/tests/test_reshape.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq, assert_table_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def reshape_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_round.py b/python/pylibcudf/pylibcudf/tests/test_round.py index 0b30316b9a0..2526580bc13 100644 --- a/python/pylibcudf/pylibcudf/tests/test_round.py +++ b/python/pylibcudf/pylibcudf/tests/test_round.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(params=["float32", "float64"]) def column(request, has_nulls): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_attributes.py b/python/pylibcudf/pylibcudf/tests/test_string_attributes.py index a1820def0b1..f461657281a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_attributes.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_attributes.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture() def str_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py b/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py index 176ccc55b96..3e31c75c38a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def str_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_case.py b/python/pylibcudf/pylibcudf/tests/test_string_case.py index 233cc253b14..08ac371fd96 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_case.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_case.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def string_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_char_types.py b/python/pylibcudf/pylibcudf/tests/test_string_char_types.py index bcd030c019e..06b44210d74 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_char_types.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_char_types.py @@ -2,9 +2,10 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_all_characters_of_type(): pa_array = pa.array(["1", "A"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_combine.py b/python/pylibcudf/pylibcudf/tests/test_string_combine.py index 4a7007a0d6b..eea3ac68e84 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_combine.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_combine.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + def test_concatenate_scalar_seperator(): plc_table = plc.interop.from_arrow( diff --git a/python/pylibcudf/pylibcudf/tests/test_string_contains.py b/python/pylibcudf/pylibcudf/tests/test_string_contains.py index 4e4dd7cbb00..ba9a4a7d3b8 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_contains.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_contains.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def target_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert.py b/python/pylibcudf/pylibcudf/tests/test_string_convert.py index 69f7a0fdd33..3f3f452c4f6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture( scope="module", diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py index 117c59ff1b8..b391d2b290e 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_booleans(): pa_array = pa.array(["true", None, "True"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py index f3e84286a36..c9368d858a4 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py @@ -3,10 +3,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture def fmt(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py index 6d704309bfd..2d3578e4e71 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py @@ -3,10 +3,11 @@ from datetime import datetime, timedelta import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture( params=[ diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py index b1c4d729604..012e722038e 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py @@ -2,9 +2,10 @@ import decimal import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_fixed_point(): typ = pa.decimal128(38, 2) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py index e9918fab559..8ee2b5075af 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_floats(): typ = pa.float32() diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py index 6d1d565af30..01192c2d1f8 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py @@ -1,8 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_integers(): typ = pa.int8() diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py index 4dc3e512624..b533809f106 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py @@ -1,8 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_ipv4_to_integers(): arr = pa.array(["123.45.67.890", None]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py index 8591732b39e..737036a4f0f 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.mark.parametrize("na_rep", [None, pa.scalar("")]) @pytest.mark.parametrize("separators", [None, pa.array([",", "[", "]"])]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py index fee8c3fb8f6..528736798c7 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py @@ -2,9 +2,10 @@ import urllib import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_url_encode(): data = ["/home/nfs", None] diff --git a/python/pylibcudf/pylibcudf/tests/test_string_extract.py b/python/pylibcudf/pylibcudf/tests/test_string_extract.py index 788b86423c4..e70edf4fb33 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_extract.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_extract.py @@ -2,6 +2,7 @@ import pyarrow as pa import pyarrow.compute as pc + import pylibcudf as plc diff --git a/python/pylibcudf/pylibcudf/tests/test_string_find.py b/python/pylibcudf/pylibcudf/tests/test_string_find.py index db3b13a5aae..82ec18832a9 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_find.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_find.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py b/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py index d6b37a388f0..fa9eee3594b 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_find_multiple(): arr = pa.array(["abc", "def"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_findall.py b/python/pylibcudf/pylibcudf/tests/test_string_findall.py index debfad92d00..b73d812c898 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_findall.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_findall.py @@ -2,9 +2,10 @@ import re import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_findall(): arr = pa.array(["bunny", "rabbit", "hare", "dog"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_padding.py b/python/pylibcudf/pylibcudf/tests/test_string_padding.py index 2ba775d17ae..79498132097 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_padding.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_padding.py @@ -2,6 +2,7 @@ import pyarrow as pa import pyarrow.compute as pc + import pylibcudf as plc diff --git a/python/pylibcudf/pylibcudf/tests/test_string_repeat.py b/python/pylibcudf/pylibcudf/tests/test_string_repeat.py index 18b5d8bf4d0..c06c06be7c6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_repeat.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_repeat.py @@ -2,9 +2,10 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize("repeats", [pa.array([2, 2]), 2]) def test_repeat_strings(repeats): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_replace.py b/python/pylibcudf/pylibcudf/tests/test_string_replace.py index 5a9c2007b73..2c7d25133de 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_replace.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_replace.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py b/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py index ff2ce348d3b..511f826441a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.mark.parametrize("max_replace_count", [-1, 1]) def test_replace_re_regex_program_scalar(max_replace_count): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_slice.py b/python/pylibcudf/pylibcudf/tests/test_string_slice.py index d9ce5591b98..1759f739e31 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_slice.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_slice.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def pa_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py b/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py index 80cae8d1c6b..4e80f19b814 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_eq +import pylibcudf as plc + @pytest.fixture def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_split_split.py b/python/pylibcudf/pylibcudf/tests/test_string_split_split.py index 2aeffac8209..450b336ce65 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_split_split.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_split_split.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq, assert_table_eq +import pylibcudf as plc + @pytest.fixture def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_strip.py b/python/pylibcudf/pylibcudf/tests/test_string_strip.py index 005e5e4a405..5869e5f4920 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_strip.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_strip.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + data_strings = [ "AbC", "123abc", diff --git a/python/pylibcudf/pylibcudf/tests/test_string_translate.py b/python/pylibcudf/pylibcudf/tests/test_string_translate.py index 2ae893e69fb..84fd3354ac6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_translate.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_translate.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py index a1c820cd586..00442d866e9 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py @@ -2,9 +2,10 @@ import textwrap import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_wrap(): width = 12 diff --git a/python/pylibcudf/pylibcudf/tests/test_table.py b/python/pylibcudf/pylibcudf/tests/test_table.py index e822d6a97a8..ac39ef4c5c9 100644 --- a/python/pylibcudf/pylibcudf/tests/test_table.py +++ b/python/pylibcudf/pylibcudf/tests/test_table.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize( "arrow_tbl", diff --git a/python/pylibcudf/pylibcudf/tests/test_transform.py b/python/pylibcudf/pylibcudf/tests/test_transform.py index d5c618f07e4..49802fe64ac 100644 --- a/python/pylibcudf/pylibcudf/tests/test_transform.py +++ b/python/pylibcudf/pylibcudf/tests/test_transform.py @@ -3,9 +3,10 @@ import math import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_nans_to_nulls(has_nans): if has_nans: diff --git a/python/pylibcudf/pylibcudf/tests/test_transpose.py b/python/pylibcudf/pylibcudf/tests/test_transpose.py index ac11123f680..b0c0bc72ead 100644 --- a/python/pylibcudf/pylibcudf/tests/test_transpose.py +++ b/python/pylibcudf/pylibcudf/tests/test_transpose.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from packaging.version import parse +import pylibcudf as plc + @pytest.mark.skipif( parse(pa.__version__) < parse("16.0.0"), diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index ea5b3065896..a80c85a1fa8 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -53,48 +53,20 @@ test = [ Homepage = "https://github.com/rapidsai/cudf" Documentation = "https://docs.rapids.ai/api/cudf/stable/" -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", -] -known_first_party = [ - "cudf", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["cudf"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] [tool.pytest.ini_options] # --import-mode=importlib because two test_json.py exists and tests directory is not a structured module From 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 25 Oct 2024 10:50:11 -1000 Subject: [PATCH 34/76] Add to_dlpack/from_dlpack APIs to pylibcudf (#17055) Contributes to https://github.com/rapidsai/cudf/issues/15162 Could use some advice how to type the input of `from_dlpack` and outut of `to_dlpack` which are PyCapsule objects. EDIT: I notice Cython just types them as object https://github.com/cython/cython/blob/master/Cython/Includes/cpython/pycapsule.pxd. Stylistically do we want add `object var_name` or just leave untyped? Authors: - Matthew Roeschke (https://github.com/mroeschke) - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Murray (https://github.com/Matt711) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17055 --- python/cudf/cudf/_lib/interop.pyx | 69 ++------------ python/pylibcudf/pylibcudf/__init__.pxd | 2 + python/pylibcudf/pylibcudf/interop.pxd | 8 ++ python/pylibcudf/pylibcudf/interop.pyx | 94 ++++++++++++++++++- .../pylibcudf/pylibcudf/libcudf/interop.pxd | 10 +- .../pylibcudf/pylibcudf/tests/test_interop.py | 31 ++++++ 6 files changed, 148 insertions(+), 66 deletions(-) create mode 100644 python/pylibcudf/pylibcudf/interop.pxd diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 1dc586bb257..1c9d3a01b80 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -1,49 +1,22 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cpython cimport pycapsule -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - import pylibcudf -from pylibcudf.libcudf.interop cimport ( - DLManagedTensor, - from_dlpack as cpp_from_dlpack, - to_dlpack as cpp_to_dlpack, -) -from pylibcudf.libcudf.table.table cimport table -from pylibcudf.libcudf.table.table_view cimport table_view - -from cudf._lib.utils cimport ( - columns_from_pylibcudf_table, - columns_from_unique_ptr, - table_view_from_columns, -) +from cudf._lib.utils cimport columns_from_pylibcudf_table from cudf.core.buffer import acquire_spill_lock from cudf.core.dtypes import ListDtype, StructDtype -def from_dlpack(dlpack_capsule): +def from_dlpack(object dlpack_capsule): """ Converts a DLPack Tensor PyCapsule into a list of columns. DLPack Tensor PyCapsule is expected to have the name "dltensor". """ - cdef DLManagedTensor* dlpack_tensor = pycapsule.\ - PyCapsule_GetPointer(dlpack_capsule, 'dltensor') - pycapsule.PyCapsule_SetName(dlpack_capsule, 'used_dltensor') - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_from_dlpack(dlpack_tensor) - ) - - res = columns_from_unique_ptr(move(c_result)) - dlpack_tensor.deleter(dlpack_tensor) - return res + return columns_from_pylibcudf_table( + pylibcudf.interop.from_dlpack(dlpack_capsule) + ) def to_dlpack(list source_columns): @@ -52,39 +25,13 @@ def to_dlpack(list source_columns): DLPack Tensor PyCapsule will have the name "dltensor". """ - if any(column.null_count for column in source_columns): - raise ValueError( - "Cannot create a DLPack tensor with null values. \ - Input is required to have null count as zero." - ) - - cdef DLManagedTensor *dlpack_tensor - cdef table_view source_table_view = table_view_from_columns(source_columns) - - with nogil: - dlpack_tensor = cpp_to_dlpack( - source_table_view + return pylibcudf.interop.to_dlpack( + pylibcudf.Table( + [col.to_pylibcudf(mode="read") for col in source_columns] ) - - return pycapsule.PyCapsule_New( - dlpack_tensor, - 'dltensor', - dlmanaged_tensor_pycapsule_deleter ) -cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept: - cdef DLManagedTensor* dlpack_tensor = 0 - try: - dlpack_tensor = pycapsule.PyCapsule_GetPointer( - pycap_obj, 'used_dltensor') - return # we do not call a used capsule's deleter - except Exception: - dlpack_tensor = pycapsule.PyCapsule_GetPointer( - pycap_obj, 'dltensor') - dlpack_tensor.deleter(dlpack_tensor) - - def gather_metadata(object cols_dtypes): """ Generates a ColumnMetadata vector for each column. diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd index aa67b4b1149..9bdfdab97c2 100644 --- a/python/pylibcudf/pylibcudf/__init__.pxd +++ b/python/pylibcudf/pylibcudf/__init__.pxd @@ -13,6 +13,7 @@ from . cimport ( expressions, filling, groupby, + interop, join, json, labeling, @@ -62,6 +63,7 @@ __all__ = [ "filling", "gpumemoryview", "groupby", + "interop", "join", "json", "lists", diff --git a/python/pylibcudf/pylibcudf/interop.pxd b/python/pylibcudf/pylibcudf/interop.pxd new file mode 100644 index 00000000000..2a0a8c15fdd --- /dev/null +++ b/python/pylibcudf/pylibcudf/interop.pxd @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.table cimport Table + + +cpdef Table from_dlpack(object managed_tensor) + +cpdef object to_dlpack(Table input) diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx index 642516a1b90..61e812353b7 100644 --- a/python/pylibcudf/pylibcudf/interop.pyx +++ b/python/pylibcudf/pylibcudf/interop.pyx @@ -1,6 +1,11 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. -from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New +from cpython.pycapsule cimport ( + PyCapsule_GetPointer, + PyCapsule_IsValid, + PyCapsule_New, + PyCapsule_SetName, +) from libc.stdlib cimport free from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -16,11 +21,14 @@ from pylibcudf.libcudf.interop cimport ( ArrowArray, ArrowArrayStream, ArrowSchema, + DLManagedTensor, column_metadata, from_arrow_column as cpp_from_arrow_column, from_arrow_stream as cpp_from_arrow_stream, + from_dlpack as cpp_from_dlpack, to_arrow_host_raw, to_arrow_schema_raw, + to_dlpack as cpp_to_dlpack, ) from pylibcudf.libcudf.table.table cimport table @@ -315,3 +323,87 @@ def _to_arrow_scalar(cudf_object, metadata=None): # Note that metadata for scalars is primarily important for preserving # information on nested types since names are otherwise irrelevant. return to_arrow(Column.from_scalar(cudf_object, 1), metadata=metadata)[0] + + +cpdef Table from_dlpack(object managed_tensor): + """ + Convert a DLPack DLTensor into a cudf table. + + For details, see :cpp:func:`cudf::from_dlpack` + + Parameters + ---------- + managed_tensor : PyCapsule + A 1D or 2D column-major (Fortran order) tensor. + + Returns + ------- + Table + Table with a copy of the tensor data. + """ + if not PyCapsule_IsValid(managed_tensor, "dltensor"): + raise ValueError("Invalid PyCapsule object") + cdef unique_ptr[table] c_result + cdef DLManagedTensor* dlpack_tensor = PyCapsule_GetPointer( + managed_tensor, "dltensor" + ) + if dlpack_tensor is NULL: + raise ValueError("PyCapsule object contained a NULL pointer") + PyCapsule_SetName(managed_tensor, "used_dltensor") + + # Note: A copy is always performed when converting the dlpack + # data to a libcudf table. We also delete the dlpack_tensor pointer + # as the pointer is not deleted by libcudf's from_dlpack function. + # TODO: https://github.com/rapidsai/cudf/issues/10874 + # TODO: https://github.com/rapidsai/cudf/issues/10849 + with nogil: + c_result = cpp_from_dlpack(dlpack_tensor) + + cdef Table result = Table.from_libcudf(move(c_result)) + dlpack_tensor.deleter(dlpack_tensor) + return result + + +cpdef object to_dlpack(Table input): + """ + Convert a cudf table into a DLPack DLTensor. + + For details, see :cpp:func:`cudf::to_dlpack` + + Parameters + ---------- + input : Table + A 1D or 2D column-major (Fortran order) tensor. + + Returns + ------- + PyCapsule + 1D or 2D DLPack tensor with a copy of the table data, or nullptr. + """ + for col in input._columns: + if col.null_count(): + raise ValueError( + "Cannot create a DLPack tensor with null values. " + "Input is required to have null count as zero." + ) + cdef DLManagedTensor *dlpack_tensor + + with nogil: + dlpack_tensor = cpp_to_dlpack(input.view()) + + return PyCapsule_New( + dlpack_tensor, + "dltensor", + dlmanaged_tensor_pycapsule_deleter + ) + + +cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept: + if PyCapsule_IsValid(pycap_obj, "used_dltensor"): + # we do not call a used capsule's deleter + return + cdef DLManagedTensor* dlpack_tensor = PyCapsule_GetPointer( + pycap_obj, "dltensor" + ) + if dlpack_tensor is not NULL: + dlpack_tensor.deleter(dlpack_tensor) diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd index 30b97fdec34..b75e9ca7001 100644 --- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd @@ -32,11 +32,13 @@ cdef extern from "cudf/interop.hpp" nogil: cdef extern from "cudf/interop.hpp" namespace "cudf" \ nogil: - cdef unique_ptr[table] from_dlpack(const DLManagedTensor* tensor - ) except + + cdef unique_ptr[table] from_dlpack( + const DLManagedTensor* managed_tensor + ) except + - DLManagedTensor* to_dlpack(table_view input_table - ) except + + DLManagedTensor* to_dlpack( + const table_view& input + ) except + cdef cppclass column_metadata: column_metadata() except + diff --git a/python/pylibcudf/pylibcudf/tests/test_interop.py b/python/pylibcudf/pylibcudf/tests/test_interop.py index e4f5174ad9b..af80b6e5978 100644 --- a/python/pylibcudf/pylibcudf/tests/test_interop.py +++ b/python/pylibcudf/pylibcudf/tests/test_interop.py @@ -1,7 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import cupy as cp +import numpy as np import pyarrow as pa import pytest +from utils import assert_table_eq import pylibcudf as plc @@ -67,3 +70,31 @@ def test_decimal_other(data_type): arrow_type = plc.interop.to_arrow(data_type, precision=precision) assert arrow_type == pa.decimal128(precision, 0) + + +def test_round_trip_dlpack_plc_table(): + expected = pa.table({"a": [1, 2, 3], "b": [5, 6, 7]}) + plc_table = plc.interop.from_arrow(expected) + result = plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table)) + assert_table_eq(expected, result) + + +@pytest.mark.parametrize("array", [np.array, cp.array]) +def test_round_trip_dlpack_array(array): + arr = array([1, 2, 3]) + result = plc.interop.from_dlpack(arr.__dlpack__()) + expected = pa.table({"a": [1, 2, 3]}) + assert_table_eq(expected, result) + + +def test_to_dlpack_error(): + plc_table = plc.interop.from_arrow( + pa.table({"a": [1, None, 3], "b": [5, 6, 7]}) + ) + with pytest.raises(ValueError, match="Cannot create a DLPack tensor"): + plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table)) + + +def test_from_dlpack_error(): + with pytest.raises(ValueError, match="Invalid PyCapsule object"): + plc.interop.from_dlpack(1) From 8c4d1f201043a6802598bea3dcb58fa1e061d9e5 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 28 Oct 2024 09:22:20 -0400 Subject: [PATCH 35/76] Use make_device_uvector instead of cudaMemcpyAsync in inplace_bitmask_binop (#17181) Changes `cudf::detail::inplace_bitmask_binop()` to use `make_device_uvector()` instead of `cudaMemcpyAsync()` Found while working on #17149 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17181 --- cpp/include/cudf/detail/null_mask.cuh | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 482265d633e..025e2ccc3ec 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -166,16 +166,9 @@ size_type inplace_bitmask_binop(Binop op, rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref(); cudf::detail::device_scalar d_counter{0, stream, mr}; - rmm::device_uvector d_masks(masks.size(), stream, mr); - rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); - - CUDF_CUDA_TRY(cudaMemcpyAsync( - d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyDefault, stream.value())); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(), - masks_begin_bits.data(), - masks_begin_bits.size_bytes(), - cudaMemcpyDefault, - stream.value())); + + auto d_masks = cudf::detail::make_device_uvector_async(masks, stream, mr); + auto d_begin_bits = cudf::detail::make_device_uvector_async(masks_begin_bits, stream, mr); auto constexpr block_size = 256; cudf::detail::grid_1d config(dest_mask.size(), block_size); From ef28cddeccbcc790e05dd49794ecdfcae4f008c2 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 28 Oct 2024 10:04:33 -0700 Subject: [PATCH 36/76] Add compute_mapping_indices used by shared memory groupby (#17147) This work is part of splitting the original bulk shared memory groupby PR https://github.com/rapidsai/cudf/pull/16619. This PR introduces the `compute_mapping_indices` API, which is used by the shared memory groupby. libcudf will opt for the shared memory code path when the aggregation request is compatible with shared memory, i.e. there is enough shared memory space and no dictionary aggregation requests. Aggregating with shared memory involves two steps. The first step, introduced in this PR, calculates the offset for each input key within the shared memory aggregation storage, as well as the offset when merging the shared memory results into global memory. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17147 --- cpp/CMakeLists.txt | 2 + .../groupby/hash/compute_mapping_indices.cu | 35 ++++ .../groupby/hash/compute_mapping_indices.cuh | 192 ++++++++++++++++++ .../groupby/hash/compute_mapping_indices.hpp | 43 ++++ .../hash/compute_mapping_indices_null.cu | 35 ++++ 5 files changed, 307 insertions(+) create mode 100644 cpp/src/groupby/hash/compute_mapping_indices.cu create mode 100644 cpp/src/groupby/hash/compute_mapping_indices.cuh create mode 100644 cpp/src/groupby/hash/compute_mapping_indices.hpp create mode 100644 cpp/src/groupby/hash/compute_mapping_indices_null.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e4b9cbf8921..60132f651d2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -369,6 +369,8 @@ add_library( src/filling/sequence.cu src/groupby/groupby.cu src/groupby/hash/compute_groupby.cu + src/groupby/hash/compute_mapping_indices.cu + src/groupby/hash/compute_mapping_indices_null.cu src/groupby/hash/compute_single_pass_aggs.cu src/groupby/hash/create_sparse_results_table.cu src/groupby/hash/flatten_single_pass_aggs.cpp diff --git a/cpp/src/groupby/hash/compute_mapping_indices.cu b/cpp/src/groupby/hash/compute_mapping_indices.cu new file mode 100644 index 00000000000..519d7cd2f1c --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_mapping_indices.cuh" +#include "compute_mapping_indices.hpp" + +namespace cudf::groupby::detail::hash { +template cudf::size_type max_occupancy_grid_size>( + cudf::size_type n); + +template void compute_mapping_indices>( + cudf::size_type grid_size, + cudf::size_type num, + hash_set_ref_t global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_mapping_indices.cuh b/cpp/src/groupby/hash/compute_mapping_indices.cuh new file mode 100644 index 00000000000..d353830780f --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices.cuh @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "compute_mapping_indices.hpp" +#include "helpers.cuh" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +template +__device__ void find_local_mapping(cooperative_groups::thread_block const& block, + cudf::size_type idx, + cudf::size_type num_input_rows, + SetType shared_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* cardinality, + cudf::size_type* local_mapping_index, + cudf::size_type* shared_set_indices) +{ + auto const is_valid_input = + idx < num_input_rows and (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, idx)); + auto const [result_idx, inserted] = [&]() { + if (is_valid_input) { + auto const result = shared_set.insert_and_find(idx); + auto const matched_idx = *result.first; + auto const inserted = result.second; + // inserted a new element + if (result.second) { + auto const shared_set_index = atomicAdd(cardinality, 1); + shared_set_indices[shared_set_index] = idx; + local_mapping_index[idx] = shared_set_index; + } + return cuda::std::pair{matched_idx, inserted}; + } + return cuda::std::pair{0, false}; // dummy values + }(); + // Syncing the thread block is needed so that updates in `local_mapping_index` are visible to all + // threads in the thread block. + block.sync(); + if (is_valid_input) { + // element was already in set + if (!inserted) { local_mapping_index[idx] = local_mapping_index[result_idx]; } + } +} + +template +__device__ void find_global_mapping(cooperative_groups::thread_block const& block, + cudf::size_type cardinality, + SetRef global_set, + cudf::size_type* shared_set_indices, + cudf::size_type* global_mapping_index) +{ + // for all unique keys in shared memory hash set, stores their matches in + // global hash set to `global_mapping_index` + for (auto idx = block.thread_rank(); idx < cardinality; idx += block.num_threads()) { + auto const input_idx = shared_set_indices[idx]; + global_mapping_index[block.group_index().x * GROUPBY_SHM_MAX_ELEMENTS + idx] = + *global_set.insert_and_find(input_idx).first; + } +} + +/* + * @brief Inserts keys into the shared memory hash set, and stores the block-wise rank for a given + * row index in `local_mapping_index`. If the number of unique keys found in a threadblock exceeds + * `GROUPBY_CARDINALITY_THRESHOLD`, the threads in that block will exit without updating + * `global_set` or setting `global_mapping_index`. Else, we insert the unique keys found to the + * global hash set, and save the row index of the global sparse table in `global_mapping_index`. + */ +template +CUDF_KERNEL void mapping_indices_kernel(cudf::size_type num_input_rows, + SetRef global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback) +{ + __shared__ cudf::size_type shared_set_indices[GROUPBY_SHM_MAX_ELEMENTS]; + + // Shared set initialization + __shared__ cuco::window windows[window_extent.value()]; + + auto raw_set = cuco::static_set_ref{ + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + global_set.key_eq(), + probing_scheme_t{global_set.hash_function()}, + cuco::thread_scope_block, + cuco::aow_storage_ref{ + window_extent, windows}}; + auto shared_set = raw_set.rebind_operators(cuco::insert_and_find); + + auto const block = cooperative_groups::this_thread_block(); + shared_set.initialize(block); + + __shared__ cudf::size_type cardinality; + if (block.thread_rank() == 0) { cardinality = 0; } + block.sync(); + + auto const stride = cudf::detail::grid_1d::grid_stride(); + + for (auto idx = cudf::detail::grid_1d::global_thread_id(); + idx - block.thread_rank() < num_input_rows; + idx += stride) { + find_local_mapping(block, + idx, + num_input_rows, + shared_set, + row_bitmask, + skip_rows_with_nulls, + &cardinality, + local_mapping_index, + shared_set_indices); + + block.sync(); + + if (cardinality >= GROUPBY_CARDINALITY_THRESHOLD) { + if (block.thread_rank() == 0) { needs_global_memory_fallback->test_and_set(); } + break; + } + } + + // Insert unique keys from shared to global hash set if block-cardinality + // doesn't exceed the threshold upper-limit + if (cardinality < GROUPBY_CARDINALITY_THRESHOLD) { + find_global_mapping(block, cardinality, global_set, shared_set_indices, global_mapping_index); + } + + if (block.thread_rank() == 0) { block_cardinality[block.group_index().x] = cardinality; } +} + +template +cudf::size_type max_occupancy_grid_size(cudf::size_type n) +{ + cudf::size_type max_active_blocks{-1}; + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &max_active_blocks, mapping_indices_kernel, GROUPBY_BLOCK_SIZE, 0)); + auto const grid_size = max_active_blocks * cudf::detail::num_multiprocessors(); + auto const num_blocks = cudf::util::div_rounding_up_safe(n, GROUPBY_BLOCK_SIZE); + return std::min(grid_size, num_blocks); +} + +template +void compute_mapping_indices(cudf::size_type grid_size, + cudf::size_type num, + SetRef global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream) +{ + mapping_indices_kernel<<>>( + num, + global_set, + row_bitmask, + skip_rows_with_nulls, + local_mapping_index, + global_mapping_index, + block_cardinality, + needs_global_memory_fallback); +} +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_mapping_indices.hpp b/cpp/src/groupby/hash/compute_mapping_indices.hpp new file mode 100644 index 00000000000..473ad99e650 --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +namespace cudf::groupby::detail::hash { +/* + * @brief Computes the maximum number of active blocks of the given kernel that can be executed on + * the underlying device + */ +template +[[nodiscard]] cudf::size_type max_occupancy_grid_size(cudf::size_type n); + +template +void compute_mapping_indices(cudf::size_type grid_size, + cudf::size_type num, + SetRef global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_mapping_indices_null.cu b/cpp/src/groupby/hash/compute_mapping_indices_null.cu new file mode 100644 index 00000000000..81c3c9e456f --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices_null.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_mapping_indices.cuh" +#include "compute_mapping_indices.hpp" + +namespace cudf::groupby::detail::hash { +template cudf::size_type +max_occupancy_grid_size>(cudf::size_type n); + +template void compute_mapping_indices>( + cudf::size_type grid_size, + cudf::size_type num, + nullable_hash_set_ref_t global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash From a83e1a3766d8b647e34a09f4bc79530e298dfed9 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 28 Oct 2024 14:54:32 -0400 Subject: [PATCH 37/76] Add 2-cpp approvers text to contributing guide [no ci] (#17182) Adds text to the contributing guide mentioning 2 cpp-codeowner approvals are required for any C++changes. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17182 --- CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b55af21a300..3db1ed35294 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -38,6 +38,7 @@ conduct. More information can be found at: 8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks). Fix if needed. 9. Wait for other developers to review your code and update code as needed. + Changes to any C++ files require at least 2 approvals from the cudf-cpp-codeowners before merging. 10. Once reviewed and approved, a RAPIDS developer will merge your pull request. If you are unsure about anything, don't hesitate to comment on issues and ask for clarification! From 7b17fbe41b3bd5f56ec0c1836f80d3d942578f78 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 28 Oct 2024 14:43:29 -0500 Subject: [PATCH 38/76] Remove java reservation (#17189) This removes a file for a feature that we intended to use, but never was. The other parts of that feature were already removed, but this was missed. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17189 --- .../ai/rapids/cudf/HostMemoryReservation.java | 32 ------------------- 1 file changed, 32 deletions(-) delete mode 100644 java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java b/java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java deleted file mode 100644 index 72c2e659372..00000000000 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package ai.rapids.cudf; - -/** - * Represents some amount of host memory that has been reserved. A reservation guarantees that one - * or more allocations up to the reserved amount, minus padding for alignment will succeed. A - * reservation typically guarantees the amount can be allocated one, meaning when a buffer - * allocated from a reservation is freed it is not returned to the reservation, but to the pool of - * memory the reservation originally came from. If more memory is allocated from the reservation - * an OutOfMemoryError may be thrown, but it is not guaranteed to happen. - * - * When the reservation is closed any unused reservation will be returned to the pool of memory - * the reservation came from. - */ -public interface HostMemoryReservation extends HostMemoryAllocator, AutoCloseable {} From abecd0b54d65dd2678f617f8c1a88320f523465f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 28 Oct 2024 15:33:59 -0500 Subject: [PATCH 39/76] build wheels without build isolation (#17088) Contributes to https://github.com/rapidsai/build-planning/issues/108 Contributes to https://github.com/rapidsai/build-planning/issues/111 Proposes some small packaging/CI changes, matching similar changes being made across RAPIDS. * building `libcudf` wheels with `--no-build-isolation` (for better `sccache` hit rate) * printing `sccache` stats to CI logs * updating to the latest `rapids-dependency-file-generator` (v1.16.0) * always explicitly specifying `cpp` / `python` in calls to `rapids-upload-wheels-to-s3` # Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17088 --- .pre-commit-config.yaml | 2 +- ci/build_cpp.sh | 4 ++++ ci/build_python.sh | 10 ++++++++++ ci/build_wheel.sh | 15 +++++++++++++-- ci/build_wheel_cudf.sh | 2 +- ci/build_wheel_cudf_polars.sh | 4 ++-- ci/build_wheel_dask_cudf.sh | 4 ++-- ci/build_wheel_libcudf.sh | 24 ++++++++++++++++++++++-- ci/build_wheel_pylibcudf.sh | 4 ++-- dependencies.yaml | 2 +- 10 files changed, 58 insertions(+), 13 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e86407de11..f5234f58efe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -155,7 +155,7 @@ repos: ) - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.13.11 + rev: v1.16.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index e5fcef17a83..3d06eacf9ff 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -15,8 +15,12 @@ rapids-print-env rapids-logger "Begin cpp build" +sccache --zero-stats + # With boa installed conda build forward to boa RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild \ conda/recipes/libcudf +sccache --show-adv-stats + rapids-upload-conda-to-s3 cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 823d7f62290..ed90041cc77 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -19,6 +19,8 @@ rapids-logger "Begin py build" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +sccache --zero-stats + # TODO: Remove `--no-test` flag once importing on a CPU # node works correctly # With boa installed conda build forwards to the boa builder @@ -28,12 +30,18 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --channel "${CPP_CHANNEL}" \ conda/recipes/pylibcudf +sccache --show-adv-stats +sccache --zero-stats + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cudf +sccache --show-adv-stats +sccache --zero-stats + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ @@ -46,6 +54,8 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cudf_kafka +sccache --show-adv-stats + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index bf76f4ed29a..78b8a8a08cf 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -3,7 +3,8 @@ set -euo pipefail -package_dir=$1 +package_name=$1 +package_dir=$2 source rapids-configure-sccache source rapids-date-string @@ -12,4 +13,14 @@ rapids-generate-version > ./VERSION cd "${package_dir}" -python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check +sccache --zero-stats + +rapids-logger "Building '${package_name}' wheel" +python -m pip wheel \ + -w dist \ + -v \ + --no-deps \ + --disable-pip-version-check \ + . + +sccache --show-adv-stats diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index fb93b06dbe2..fef4416a366 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -18,7 +18,7 @@ echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt export PIP_CONSTRAINT="/tmp/constraints.txt" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh cudf ${package_dir} python -m auditwheel repair \ --exclude libcudf.so \ diff --git a/ci/build_wheel_cudf_polars.sh b/ci/build_wheel_cudf_polars.sh index 9c945e11c00..79853cdbdb2 100755 --- a/ci/build_wheel_cudf_polars.sh +++ b/ci/build_wheel_cudf_polars.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/cudf_polars" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh cudf-polars ${package_dir} RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist +RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist diff --git a/ci/build_wheel_dask_cudf.sh b/ci/build_wheel_dask_cudf.sh index eb2a91289f7..00c64afa2ef 100755 --- a/ci/build_wheel_dask_cudf.sh +++ b/ci/build_wheel_dask_cudf.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/dask_cudf" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh dask-cudf ${package_dir} RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist +RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh index 91bc071583e..b3d6778ea04 100755 --- a/ci/build_wheel_libcudf.sh +++ b/ci/build_wheel_libcudf.sh @@ -3,10 +3,30 @@ set -euo pipefail +package_name="libcudf" package_dir="python/libcudf" +rapids-logger "Generating build requirements" + +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \ +| tee /tmp/requirements-build.txt + +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt + +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 + export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh "${package_name}" "${package_dir}" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" @@ -16,4 +36,4 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* -RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist" diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh index 5e9f7f8a0c4..839d98846fe 100755 --- a/ci/build_wheel_pylibcudf.sh +++ b/ci/build_wheel_pylibcudf.sh @@ -16,7 +16,7 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt export PIP_CONSTRAINT="/tmp/constraints.txt" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh pylibcudf ${package_dir} python -m auditwheel repair \ --exclude libcudf.so \ @@ -24,4 +24,4 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* -RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/dependencies.yaml b/dependencies.yaml index 4804f7b00b0..7c7aa43fa41 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -232,7 +232,7 @@ files: key: cudf-pandas-tests includes: - test_python_cudf_pandas - py_rapids_build_cudf_polars: + py_build_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars extras: From 4c04b7c8790263dc68c5753609f3cb867806359f Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Mon, 28 Oct 2024 21:15:29 +0000 Subject: [PATCH 40/76] Added strings AST vs BINARY_OP benchmarks (#17128) This merge request implements benchmarks to compare the strings AST and BINARY_OPs. It also moves out the common string input generator function to a common benchmarks header as it is repeated across other benchmarks. Authors: - Basit Ayantunde (https://github.com/lamarrr) Approvers: - David Wendt (https://github.com/davidwendt) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/17128 --- cpp/benchmarks/ast/transform.cpp | 95 ++++++++++++++++++- cpp/benchmarks/binaryop/binaryop.cpp | 82 +++++++++++++++- cpp/benchmarks/binaryop/compiled_binaryop.cpp | 2 +- cpp/benchmarks/common/generate_input.cu | 56 +++++++++++ cpp/benchmarks/common/generate_input.hpp | 12 +++ cpp/benchmarks/string/contains.cpp | 57 +---------- cpp/benchmarks/string/find.cpp | 7 +- cpp/benchmarks/string/like.cpp | 58 +---------- 8 files changed, 246 insertions(+), 123 deletions(-) diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp index f44f26e4d2c..7fe61054a26 100644 --- a/cpp/benchmarks/ast/transform.cpp +++ b/cpp/benchmarks/ast/transform.cpp @@ -16,16 +16,29 @@ #include +#include + +#include +#include +#include +#include #include #include +#include +#include #include #include #include +#include #include +#include +#include +#include +#include #include #include #include @@ -86,7 +99,71 @@ static void BM_ast_transform(nvbench::state& state) auto const& expression_tree_root = expressions.back(); // Use the number of bytes read from global memory - state.add_global_memory_reads(table_size * (tree_levels + 1)); + state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); + state.add_global_memory_writes(table_size); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); +} + +template +static void BM_string_compare_ast_transform(nvbench::state& state) +{ + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + + // Create table data + auto const num_cols = num_comparisons * 2; + std::vector> columns; + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { + columns.emplace_back(create_string_column(num_rows, string_width, hit_rate)); + }); + + cudf::table table{std::move(columns)}; + cudf::table_view const table_view = table.view(); + + int64_t const chars_size = std::accumulate( + table_view.begin(), + table_view.end(), + static_cast(0), + [](int64_t size, auto& column) -> int64_t { + return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream()); + }); + + // Create column references + auto column_refs = std::vector(); + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_cols), + std::back_inserter(column_refs), + [](auto const& column_id) { return cudf::ast::column_reference(column_id); }); + + // Create expression trees + std::list expressions; + + // Construct AST tree (a == b && c == d && e == f && ...) + + expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1])); + + std::for_each(thrust::make_counting_iterator(1), + thrust::make_counting_iterator(num_comparisons), + [&](size_t idx) { + auto const& lhs = expressions.back(); + auto const& rhs = expressions.emplace_back( + cudf::ast::operation(cmp_op, column_refs[idx * 2], column_refs[idx * 2 + 1])); + expressions.emplace_back(cudf::ast::operation(reduce_op, lhs, rhs)); + }); + + auto const& expression_tree_root = expressions.back(); + + // Use the number of bytes read from global memory + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); @@ -115,3 +192,19 @@ AST_TRANSFORM_BENCHMARK_DEFINE( ast_int32_imbalanced_reuse_nulls, int32_t, TreeType::IMBALANCED_LEFT, true, true); AST_TRANSFORM_BENCHMARK_DEFINE( ast_double_imbalanced_unique_nulls, double, TreeType::IMBALANCED_LEFT, false, true); + +#define AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \ + static void name(::nvbench::state& st) \ + { \ + ::BM_string_compare_ast_transform(st); \ + } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("string_width", {32, 64, 128, 256}) \ + .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ + .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("hit_rate", {50, 100}) + +AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and, + cudf::ast::ast_operator::EQUAL, + cudf::ast::ast_operator::LOGICAL_AND); diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp index 7d267a88764..35e41c6c2a4 100644 --- a/cpp/benchmarks/binaryop/binaryop.cpp +++ b/cpp/benchmarks/binaryop/binaryop.cpp @@ -17,12 +17,18 @@ #include #include +#include +#include #include #include +#include + #include #include +#include +#include // This set of benchmarks is designed to be a comparison for the AST benchmarks @@ -44,7 +50,8 @@ static void BM_binaryop_transform(nvbench::state& state) cudf::table_view table{*source_table}; // Use the number of bytes read from global memory - state.add_global_memory_reads(table_size * (tree_levels + 1)); + state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); + state.add_global_memory_writes(table_size); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { // Execute tree that chains additions like (((a + b) + c) + d) @@ -64,11 +71,65 @@ static void BM_binaryop_transform(nvbench::state& state) }); } +template +static void BM_string_compare_binaryop_transform(nvbench::state& state) +{ + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + + // Create table data + auto const num_cols = num_comparisons * 2; + std::vector> columns; + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { + columns.emplace_back(create_string_column(num_rows, string_width, hit_rate)); + }); + + cudf::table table{std::move(columns)}; + cudf::table_view const table_view = table.view(); + + int64_t const chars_size = std::accumulate( + table_view.begin(), table_view.end(), static_cast(0), [](int64_t size, auto& column) { + return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream()); + }); + + // Create column references + + // Use the number of bytes read from global memory + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); + + // Construct binary operations (a == b && c == d && e == f && ...) + auto constexpr bool_type = cudf::data_type{cudf::type_id::BOOL8}; + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream{launch.get_stream().get_stream()}; + std::unique_ptr reduction = + cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream); + std::for_each( + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(num_comparisons), + [&](size_t idx) { + std::unique_ptr comparison = cudf::binary_operation( + table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream); + std::unique_ptr reduced = + cudf::binary_operation(*comparison, *reduction, reduce_op, bool_type, stream); + stream.synchronize(); + reduction = std::move(reduced); + }); + }); +} + #define BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns) \ \ static void name(::nvbench::state& st) \ { \ - BM_binaryop_transform(st); \ + ::BM_binaryop_transform(st); \ } \ NVBENCH_BENCH(name) \ .add_int64_axis("tree_levels", {1, 2, 5, 10}) \ @@ -86,3 +147,20 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique, double, TreeType::IMBALANCED_LEFT, false); + +#define STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \ + \ + static void name(::nvbench::state& st) \ + { \ + ::BM_string_compare_binaryop_transform(st); \ + } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("string_width", {32, 64, 128, 256}) \ + .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ + .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("hit_rate", {50, 100}) + +STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform, + cudf::binary_operator::EQUAL, + cudf::binary_operator::LOGICAL_AND); diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index bc0ff69bce9..cd3c3871a2e 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -39,7 +39,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) // use number of bytes read and written to global memory state.add_global_memory_reads(table_size); state.add_global_memory_reads(table_size); - state.add_global_memory_reads(table_size); + state.add_global_memory_writes(table_size); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); }); diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index dc258e32dc5..bdce8a31176 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -17,13 +17,17 @@ #include "generate_input.hpp" #include "random_distribution_factory.cuh" +#include + #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -918,6 +922,58 @@ std::unique_ptr create_sequence_table(std::vector co return std::make_unique(std::move(columns)); } +std::unique_ptr create_string_column(cudf::size_type num_rows, + cudf::size_type row_width, + int32_t hit_rate) +{ + // build input table using the following data + auto raw_data = cudf::test::strings_column_wrapper( + { + "123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns; + "012345 6789 01234 56789 0123 456", // the rest do not match + "abc 4567890 DEFGHI 0987 Wxyz 123", + "abcdefghijklmnopqrstuvwxyz 01234", + "", + "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", + "9876543210,abcdefghijklmnopqrstU", + "9876543210,abcdefghijklmnopqrstU", + "123 édf 4567890 DéFG 0987 X5", + "1", + }) + .release(); + + if (row_width / 32 > 1) { + std::vector columns; + for (int i = 0; i < row_width / 32; ++i) { + columns.push_back(raw_data->view()); + } + raw_data = cudf::strings::concatenate(cudf::table_view(columns)); + } + auto data_view = raw_data->view(); + + // compute number of rows in n_rows that should match + auto const num_matches = (static_cast(num_rows) * hit_rate) / 100; + + // Create a randomized gather-map to build a column out of the strings in data. + data_profile gather_profile = + data_profile_builder().cardinality(0).null_probability(0.0).distribution( + cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); + auto gather_table = + create_random_table({cudf::type_id::INT32}, row_count{num_rows}, gather_profile); + gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + + // Create scatter map by placing 0-index values throughout the gather-map + auto scatter_data = cudf::sequence(num_matches, + cudf::numeric_scalar(0), + cudf::numeric_scalar(num_rows / num_matches)); + auto zero_scalar = cudf::numeric_scalar(0); + auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); + auto gather_map = table->view().column(0); + table = cudf::gather(cudf::table_view({data_view}), gather_map); + + return std::move(table->release().front()); +} + std::pair create_random_null_mask( cudf::size_type size, std::optional null_probability, unsigned seed) { diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index 68d3dc492f5..57834fd11d2 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -670,6 +670,18 @@ std::unique_ptr create_random_column(cudf::type_id dtype_id, data_profile const& data_params = data_profile{}, unsigned seed = 1); +/** + * @brief Deterministically generates a large string column filled with data with the given + * parameters. + * + * @param num_rows Number of rows in the output column + * @param row_width Width of each string in the column + * @param hit_rate The hit rate percentage, ranging from 0 to 100 + */ +std::unique_ptr create_string_column(cudf::size_type num_rows, + cudf::size_type row_width, + int32_t hit_rate); + /** * @brief Generate sequence columns starting with value 0 in first row and increasing by 1 in * subsequent rows. diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index ae6c8b844c8..a73017dda18 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -17,10 +17,6 @@ #include #include -#include - -#include -#include #include #include #include @@ -28,57 +24,6 @@ #include -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate) -{ - // build input table using the following data - auto raw_data = cudf::test::strings_column_wrapper( - { - "123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns; - "012345 6789 01234 56789 0123 456", // the rest do not match - "abc 4567890 DEFGHI 0987 Wxyz 123", - "abcdefghijklmnopqrstuvwxyz 01234", - "", - "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", - "9876543210,abcdefghijklmnopqrstU", - "9876543210,abcdefghijklmnopqrstU", - "123 édf 4567890 DéFG 0987 X5", - "1", - }) - .release(); - - if (row_width / 32 > 1) { - std::vector columns; - for (int i = 0; i < row_width / 32; ++i) { - columns.push_back(raw_data->view()); - } - raw_data = cudf::strings::concatenate(cudf::table_view(columns)); - } - auto data_view = raw_data->view(); - - // compute number of rows in n_rows that should match - auto matches = static_cast(n_rows * hit_rate) / 100; - - // Create a randomized gather-map to build a column out of the strings in data. - data_profile gather_profile = - data_profile_builder().cardinality(0).null_probability(0.0).distribution( - cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); - auto gather_table = - create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); - gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); - - // Create scatter map by placing 0-index values throughout the gather-map - auto scatter_data = cudf::sequence( - matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); - auto zero_scalar = cudf::numeric_scalar(0); - auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); - auto gather_map = table->view().column(0); - table = cudf::gather(cudf::table_view({data_view}), gather_map); - - return std::move(table->release().front()); -} - // longer pattern lengths demand more working memory per string std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$", "5W43"}; @@ -94,7 +39,7 @@ static void bench_contains(nvbench::state& state) state.skip("Skip benchmarks greater than size_type limit"); } - auto col = build_input_column(n_rows, row_width, hit_rate); + auto col = create_string_column(n_rows, row_width, hit_rate); auto input = cudf::strings_column_view(col->view()); auto pattern = patterns[pattern_index]; diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index a9c620e4bf0..996bdcf0332 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -19,7 +19,6 @@ #include -#include #include #include #include @@ -29,10 +28,6 @@ #include -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate); - static void bench_find_string(nvbench::state& state) { auto const n_rows = static_cast(state.get_int64("num_rows")); @@ -46,7 +41,7 @@ static void bench_find_string(nvbench::state& state) } auto const stream = cudf::get_default_stream(); - auto const col = build_input_column(n_rows, row_width, hit_rate); + auto const col = create_string_column(n_rows, row_width, hit_rate); auto const input = cudf::strings_column_view(col->view()); std::vector h_targets({"5W", "5W43", "0987 5W43"}); diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp index 99cef640dc3..105ae65cbe8 100644 --- a/cpp/benchmarks/string/like.cpp +++ b/cpp/benchmarks/string/like.cpp @@ -18,68 +18,12 @@ #include -#include -#include -#include #include #include #include #include -namespace { -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate) -{ - // build input table using the following data - auto raw_data = cudf::test::strings_column_wrapper( - { - "123 abc 4567890 DEFGHI 0987 5W43", // matches always; - "012345 6789 01234 56789 0123 456", // the rest do not match - "abc 4567890 DEFGHI 0987 Wxyz 123", - "abcdefghijklmnopqrstuvwxyz 01234", - "", - "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", - "9876543210,abcdefghijklmnopqrstU", - "9876543210,abcdefghijklmnopqrstU", - "123 édf 4567890 DéFG 0987 X5", - "1", - }) - .release(); - if (row_width / 32 > 1) { - std::vector columns; - for (int i = 0; i < row_width / 32; ++i) { - columns.push_back(raw_data->view()); - } - raw_data = cudf::strings::concatenate(cudf::table_view(columns)); - } - auto data_view = raw_data->view(); - - // compute number of rows in n_rows that should match - auto matches = static_cast(n_rows * hit_rate) / 100; - - // Create a randomized gather-map to build a column out of the strings in data. - data_profile gather_profile = - data_profile_builder().cardinality(0).null_probability(0.0).distribution( - cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); - auto gather_table = - create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); - gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); - - // Create scatter map by placing 0-index values throughout the gather-map - auto scatter_data = cudf::sequence( - matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); - auto zero_scalar = cudf::numeric_scalar(0); - auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); - auto gather_map = table->view().column(0); - table = cudf::gather(cudf::table_view({data_view}), gather_map); - - return std::move(table->release().front()); -} - -} // namespace - static void bench_like(nvbench::state& state) { auto const n_rows = static_cast(state.get_int64("num_rows")); @@ -91,7 +35,7 @@ static void bench_like(nvbench::state& state) state.skip("Skip benchmarks greater than size_type limit"); } - auto col = build_input_column(n_rows, row_width, hit_rate); + auto col = create_string_column(n_rows, row_width, hit_rate); auto input = cudf::strings_column_view(col->view()); // This pattern forces reading the entire target string (when matched expected) From 1ad9fc1feef0ea0ee38adaa8f05cde6bb05aff0f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 28 Oct 2024 16:33:50 -0700 Subject: [PATCH 41/76] Remove includes suggested by include-what-you-use (#17170) This PR cherry-picks out the suggestions from IWYU generated in #17078. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/17170 --- cpp/include/cudf/ast/detail/expression_parser.hpp | 4 ---- cpp/include/cudf/column/column_factories.hpp | 2 -- cpp/include/cudf/column/column_view.hpp | 1 - .../cudf/detail/aggregation/result_cache.hpp | 1 - cpp/include/cudf/detail/is_element_valid.hpp | 1 - .../cudf/dictionary/dictionary_column_view.hpp | 1 - cpp/include/cudf/io/text/detail/bgzip_utils.hpp | 6 ------ cpp/include/cudf/utilities/default_stream.hpp | 2 -- cpp/include/cudf/utilities/traits.hpp | 2 -- cpp/include/cudf/utilities/type_dispatcher.hpp | 1 - cpp/src/ast/expression_parser.cpp | 5 +---- cpp/src/ast/expressions.cpp | 5 +---- cpp/src/binaryop/binaryop.cpp | 5 ----- cpp/src/column/column_factories.cpp | 6 ------ cpp/src/column/column_view.cpp | 3 --- cpp/src/copying/copy.cpp | 5 ----- cpp/src/copying/pack.cpp | 1 - cpp/src/copying/split.cpp | 2 -- cpp/src/datetime/timezone.cpp | 2 -- cpp/src/groupby/hash/flatten_single_pass_aggs.hpp | 1 - cpp/src/groupby/sort/aggregate.cpp | 1 - cpp/src/interop/arrow_utilities.cpp | 5 ----- cpp/src/interop/arrow_utilities.hpp | 1 - cpp/src/interop/dlpack.cpp | 3 --- cpp/src/io/avro/avro.cpp | 1 - cpp/src/io/avro/avro.hpp | 2 -- cpp/src/io/comp/nvcomp_adapter.hpp | 2 -- cpp/src/io/comp/uncomp.cpp | 2 -- cpp/src/io/functions.cpp | 2 -- cpp/src/io/json/host_tree_algorithms.cu | 1 + cpp/src/io/json/nested_json.hpp | 3 --- cpp/src/io/orc/reader_impl_decode.cu | 1 + cpp/src/io/orc/reader_impl_helpers.cpp | 2 -- cpp/src/io/orc/reader_impl_helpers.hpp | 3 --- cpp/src/io/parquet/arrow_schema_writer.cpp | 1 - cpp/src/io/parquet/arrow_schema_writer.hpp | 5 ++--- cpp/src/io/parquet/compact_protocol_reader.hpp | 3 --- cpp/src/io/parquet/compact_protocol_writer.hpp | 1 - cpp/src/io/parquet/predicate_pushdown.cpp | 1 - cpp/src/io/parquet/reader.cpp | 2 -- cpp/src/io/parquet/reader_impl.cpp | 2 -- cpp/src/io/text/data_chunk_source_factories.cpp | 4 ---- cpp/src/io/utilities/column_buffer.cpp | 2 +- cpp/src/io/utilities/column_buffer.hpp | 5 ++--- cpp/src/io/utilities/config_utils.cpp | 4 ---- cpp/src/io/utilities/datasource.cpp | 1 - cpp/src/io/utilities/file_io_utilities.cpp | 2 -- cpp/src/io/utilities/row_selection.cpp | 3 --- cpp/src/io/utilities/row_selection.hpp | 2 +- cpp/src/jit/cache.cpp | 3 --- cpp/src/jit/util.cpp | 4 +--- cpp/src/quantiles/tdigest/tdigest_column_view.cpp | 3 +-- cpp/src/reductions/reductions.cpp | 2 -- cpp/src/reductions/scan/scan.cpp | 3 --- cpp/src/reductions/segmented/reductions.cpp | 4 ---- .../rolling/detail/optimized_unbounded_window.cpp | 3 --- cpp/src/rolling/detail/range_window_bounds.hpp | 5 +---- cpp/src/rolling/range_window_bounds.cpp | 1 - cpp/src/scalar/scalar.cpp | 2 -- cpp/src/scalar/scalar_factories.cpp | 2 -- cpp/src/strings/regex/regexec.cpp | 1 - cpp/src/strings/strings_scalar_factories.cpp | 1 - cpp/src/structs/structs_column_view.cpp | 3 +-- cpp/src/structs/utilities.cpp | 3 --- cpp/src/table/table.cpp | 1 - cpp/src/table/table_view.cpp | 3 --- cpp/src/transform/transform.cpp | 2 -- cpp/src/utilities/cuda.cpp | 2 -- cpp/src/utilities/host_memory.cpp | 1 - cpp/src/utilities/prefetch.cpp | 1 - cpp/src/utilities/stream_pool.cpp | 1 - cpp/src/utilities/traits.cpp | 2 -- cpp/src/utilities/type_checks.cpp | 2 -- cpp/tests/ast/transform_tests.cpp | 8 -------- cpp/tests/binaryop/binop-compiled-test.cpp | 2 -- cpp/tests/binaryop/binop-generic-ptx-test.cpp | 3 +-- cpp/tests/bitmask/bitmask_tests.cpp | 1 - cpp/tests/column/bit_cast_test.cpp | 3 --- cpp/tests/column/column_test.cpp | 1 - cpp/tests/column/column_view_device_span_test.cpp | 1 - cpp/tests/column/column_view_shallow_test.cpp | 2 -- cpp/tests/column/factories_test.cpp | 3 --- cpp/tests/copying/concatenate_tests.cpp | 2 -- cpp/tests/copying/copy_if_else_nested_tests.cpp | 3 +-- cpp/tests/copying/copy_range_tests.cpp | 1 - cpp/tests/copying/copy_tests.cpp | 1 - cpp/tests/copying/gather_list_tests.cpp | 4 +--- cpp/tests/copying/gather_str_tests.cpp | 1 - cpp/tests/copying/gather_struct_tests.cpp | 5 ----- cpp/tests/copying/gather_tests.cpp | 1 - cpp/tests/copying/get_value_tests.cpp | 2 -- cpp/tests/copying/purge_nonempty_nulls_tests.cpp | 3 --- cpp/tests/copying/reverse_tests.cpp | 6 +----- cpp/tests/copying/sample_tests.cpp | 5 +---- cpp/tests/copying/scatter_list_scalar_tests.cpp | 3 +-- cpp/tests/copying/scatter_list_tests.cpp | 1 - cpp/tests/copying/scatter_struct_scalar_tests.cpp | 3 +-- cpp/tests/copying/scatter_struct_tests.cpp | 1 - cpp/tests/copying/scatter_tests.cpp | 2 -- cpp/tests/copying/segmented_gather_list_tests.cpp | 3 +-- cpp/tests/copying/shift_tests.cpp | 2 -- cpp/tests/copying/slice_tests.cpp | 4 ---- cpp/tests/copying/utility_tests.cpp | 1 - cpp/tests/datetime/datetime_ops_test.cpp | 3 --- cpp/tests/dictionary/add_keys_test.cpp | 2 -- cpp/tests/dictionary/encode_test.cpp | 2 -- cpp/tests/dictionary/fill_test.cpp | 3 --- cpp/tests/dictionary/search_test.cpp | 1 - cpp/tests/dictionary/slice_test.cpp | 1 - cpp/tests/filling/fill_tests.cpp | 1 - cpp/tests/filling/repeat_tests.cpp | 4 ---- cpp/tests/filling/sequence_tests.cpp | 1 - cpp/tests/fixed_point/fixed_point_tests.cpp | 3 --- cpp/tests/groupby/collect_list_tests.cpp | 2 -- cpp/tests/groupby/collect_set_tests.cpp | 1 - cpp/tests/groupby/correlation_tests.cpp | 1 - cpp/tests/groupby/covariance_tests.cpp | 2 -- cpp/tests/groupby/groupby_test_util.cpp | 5 +---- cpp/tests/groupby/groupby_test_util.hpp | 5 +---- cpp/tests/groupby/histogram_tests.cpp | 1 - cpp/tests/groupby/max_scan_tests.cpp | 1 - cpp/tests/groupby/merge_lists_tests.cpp | 1 - cpp/tests/groupby/merge_sets_tests.cpp | 1 - cpp/tests/groupby/rank_scan_tests.cpp | 2 -- cpp/tests/groupby/shift_tests.cpp | 1 - cpp/tests/hashing/md5_test.cpp | 1 - cpp/tests/hashing/murmurhash3_x86_32_test.cpp | 2 -- cpp/tests/hashing/sha1_test.cpp | 1 - cpp/tests/hashing/sha224_test.cpp | 1 - cpp/tests/hashing/sha256_test.cpp | 1 - cpp/tests/hashing/sha384_test.cpp | 1 - cpp/tests/hashing/sha512_test.cpp | 1 - cpp/tests/hashing/xxhash_64_test.cpp | 3 --- cpp/tests/interop/from_arrow_device_test.cpp | 4 ---- cpp/tests/interop/from_arrow_host_test.cpp | 2 -- cpp/tests/interop/from_arrow_stream_test.cpp | 13 ------------- cpp/tests/interop/from_arrow_test.cpp | 4 ---- cpp/tests/interop/to_arrow_device_test.cpp | 6 ------ cpp/tests/interop/to_arrow_host_test.cpp | 6 ------ cpp/tests/interop/to_arrow_test.cpp | 2 -- cpp/tests/io/csv_test.cpp | 8 -------- cpp/tests/io/file_io_test.cpp | 3 --- cpp/tests/io/json/json_quote_normalization_test.cpp | 2 -- cpp/tests/io/json/json_test.cpp | 2 -- cpp/tests/io/json/json_tree.cpp | 6 +----- cpp/tests/io/json/nested_json_test.cpp | 8 -------- cpp/tests/io/orc_test.cpp | 1 - cpp/tests/io/parquet_common.hpp | 2 -- cpp/tests/io/parquet_misc_test.cpp | 2 -- cpp/tests/io/parquet_reader_test.cpp | 2 ++ cpp/tests/io/parquet_test.cpp | 1 - cpp/tests/io/row_selection_test.cpp | 1 - cpp/tests/io/text/data_chunk_source_test.cpp | 3 --- cpp/tests/io/text/multibyte_split_test.cpp | 4 ---- cpp/tests/iterator/value_iterator.cpp | 1 - cpp/tests/jit/parse_ptx_function.cpp | 1 - cpp/tests/join/cross_join_tests.cpp | 1 - cpp/tests/join/distinct_join_tests.cpp | 5 ----- cpp/tests/join/join_tests.cpp | 5 ----- cpp/tests/join/semi_anti_join_tests.cpp | 1 - cpp/tests/json/json_tests.cpp | 1 - cpp/tests/large_strings/large_strings_fixture.cpp | 2 -- cpp/tests/large_strings/parquet_tests.cpp | 2 -- cpp/tests/lists/contains_tests.cpp | 1 - cpp/tests/lists/extract_tests.cpp | 4 ---- cpp/tests/lists/sequences_tests.cpp | 1 - .../stream_compaction/apply_boolean_mask_tests.cpp | 2 -- cpp/tests/merge/merge_dictionary_test.cpp | 2 -- cpp/tests/merge/merge_string_test.cpp | 6 ------ cpp/tests/merge/merge_test.cpp | 2 -- cpp/tests/partitioning/round_robin_test.cpp | 7 ------- cpp/tests/quantiles/quantile_test.cpp | 1 - cpp/tests/quantiles/quantiles_test.cpp | 1 - cpp/tests/reductions/ewm_tests.cpp | 2 -- cpp/tests/reductions/list_rank_test.cpp | 5 ----- cpp/tests/reductions/rank_tests.cpp | 3 +-- cpp/tests/reductions/reduction_tests.cpp | 4 ---- cpp/tests/reductions/scan_tests.cpp | 2 -- cpp/tests/reductions/scan_tests.hpp | 5 +---- cpp/tests/replace/clamp_test.cpp | 1 - cpp/tests/replace/normalize_replace_tests.cpp | 1 - cpp/tests/replace/replace_nans_tests.cpp | 1 - cpp/tests/replace/replace_nulls_tests.cpp | 2 -- cpp/tests/replace/replace_tests.cpp | 4 ---- cpp/tests/reshape/byte_cast_tests.cpp | 1 - cpp/tests/reshape/tile_tests.cpp | 3 +-- cpp/tests/rolling/collect_ops_test.cpp | 1 - cpp/tests/rolling/empty_input_test.cpp | 4 +--- cpp/tests/rolling/grouped_rolling_range_test.cpp | 7 +------ cpp/tests/rolling/grouped_rolling_test.cpp | 1 - cpp/tests/rolling/lead_lag_test.cpp | 4 +--- cpp/tests/rolling/nth_element_test.cpp | 7 ------- cpp/tests/rolling/offset_row_window_test.cpp | 4 ---- cpp/tests/rolling/range_rolling_window_test.cpp | 5 ----- cpp/tests/rolling/range_window_bounds_test.cpp | 5 ----- cpp/tests/rolling/rolling_test.cpp | 2 -- cpp/tests/scalar/factories_test.cpp | 3 --- cpp/tests/search/search_dictionary_test.cpp | 1 - cpp/tests/search/search_list_test.cpp | 1 - cpp/tests/search/search_struct_test.cpp | 3 +-- cpp/tests/search/search_test.cpp | 1 - cpp/tests/sort/is_sorted_tests.cpp | 1 - cpp/tests/sort/rank_test.cpp | 2 -- cpp/tests/sort/sort_nested_types_tests.cpp | 3 +-- cpp/tests/sort/sort_test.cpp | 1 - cpp/tests/sort/stable_sort_tests.cpp | 3 --- .../stream_compaction/apply_boolean_mask_tests.cpp | 4 ---- .../stream_compaction/distinct_count_tests.cpp | 5 ----- cpp/tests/stream_compaction/distinct_tests.cpp | 3 --- cpp/tests/stream_compaction/drop_nans_tests.cpp | 3 --- cpp/tests/stream_compaction/drop_nulls_tests.cpp | 2 -- .../stream_compaction/stable_distinct_tests.cpp | 4 ---- cpp/tests/stream_compaction/unique_count_tests.cpp | 5 ----- cpp/tests/stream_compaction/unique_tests.cpp | 6 ------ cpp/tests/streams/binaryop_test.cpp | 1 - cpp/tests/streams/io/csv_test.cpp | 4 ---- cpp/tests/streams/io/json_test.cpp | 2 -- cpp/tests/streams/io/multibyte_split_test.cpp | 1 - cpp/tests/streams/io/orc_test.cpp | 8 -------- cpp/tests/streams/io/parquet_test.cpp | 4 ---- cpp/tests/streams/join_test.cpp | 2 -- cpp/tests/streams/null_mask_test.cpp | 3 --- cpp/tests/streams/reduction_test.cpp | 3 --- cpp/tests/streams/rolling_test.cpp | 2 -- cpp/tests/streams/stream_compaction_test.cpp | 4 ---- cpp/tests/streams/strings/factory_test.cpp | 1 - cpp/tests/streams/strings/reverse_test.cpp | 1 - cpp/tests/streams/transform_test.cpp | 6 ------ cpp/tests/strings/array_tests.cpp | 2 -- cpp/tests/strings/combine/concatenate_tests.cpp | 1 - .../strings/combine/join_list_elements_tests.cpp | 1 - cpp/tests/strings/concatenate_tests.cpp | 3 +-- cpp/tests/strings/datetime_tests.cpp | 1 - cpp/tests/strings/extract_tests.cpp | 1 - cpp/tests/strings/findall_tests.cpp | 3 --- cpp/tests/strings/fixed_point_tests.cpp | 2 -- cpp/tests/strings/integers_tests.cpp | 3 --- cpp/tests/structs/structs_column_tests.cpp | 10 ---------- cpp/tests/structs/utilities_tests.cpp | 6 ------ cpp/tests/table/row_operators_tests.cpp | 1 - cpp/tests/table/table_tests.cpp | 3 --- cpp/tests/text/minhash_tests.cpp | 4 ---- cpp/tests/text/ngrams_tests.cpp | 2 -- cpp/tests/text/normalize_tests.cpp | 1 - cpp/tests/text/stemmer_tests.cpp | 1 - cpp/tests/text/subword_tests.cpp | 2 -- cpp/tests/transform/bools_to_mask_test.cpp | 2 -- cpp/tests/transform/nans_to_null_test.cpp | 2 -- cpp/tests/transpose/transpose_test.cpp | 1 - cpp/tests/types/traits_test.cpp | 1 - cpp/tests/unary/cast_tests.cpp | 3 --- cpp/tests/unary/math_ops_test.cpp | 4 ---- cpp/tests/unary/unary_ops_test.cpp | 1 - cpp/tests/utilities/random_seed.cpp | 3 ++- cpp/tests/utilities_tests/column_debug_tests.cpp | 3 --- .../utilities_tests/column_utilities_tests.cpp | 4 ---- cpp/tests/utilities_tests/column_wrapper_tests.cpp | 1 - .../utilities_tests/lists_column_wrapper_tests.cpp | 1 - cpp/tests/utilities_tests/type_check_tests.cpp | 1 - cpp/tests/utilities_tests/type_list_tests.cpp | 2 +- 260 files changed, 39 insertions(+), 643 deletions(-) diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp index a254171ef11..f4cce8e6da6 100644 --- a/cpp/include/cudf/ast/detail/expression_parser.hpp +++ b/cpp/include/cudf/ast/detail/expression_parser.hpp @@ -17,12 +17,8 @@ #include #include -#include #include #include -#include - -#include #include #include diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 6bbe32de134..e72661ce49a 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -24,8 +24,6 @@ #include -#include - namespace CUDF_EXPORT cudf { /** * @addtogroup column_factories diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 48f89b8be25..6db5c8b3c7b 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -16,7 +16,6 @@ #pragma once #include -#include #include #include #include diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp index ec5a511bb7c..486808ebe18 100644 --- a/cpp/include/cudf/detail/aggregation/result_cache.hpp +++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp @@ -19,7 +19,6 @@ #include #include #include -#include #include diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp index 4b74d12f306..26b1bec2ced 100644 --- a/cpp/include/cudf/detail/is_element_valid.hpp +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include #include #include diff --git a/cpp/include/cudf/dictionary/dictionary_column_view.hpp b/cpp/include/cudf/dictionary/dictionary_column_view.hpp index 5596f78a90b..0a799f27d00 100644 --- a/cpp/include/cudf/dictionary/dictionary_column_view.hpp +++ b/cpp/include/cudf/dictionary/dictionary_column_view.hpp @@ -15,7 +15,6 @@ */ #pragma once -#include #include /** diff --git a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp index 11eb4518210..5659f86b0c4 100644 --- a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp +++ b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp @@ -16,16 +16,10 @@ #pragma once -#include #include #include -#include - -#include -#include #include -#include namespace CUDF_EXPORT cudf { namespace io::text::detail::bgzip { diff --git a/cpp/include/cudf/utilities/default_stream.hpp b/cpp/include/cudf/utilities/default_stream.hpp index 97a42243250..3e740b81cc9 100644 --- a/cpp/include/cudf/utilities/default_stream.hpp +++ b/cpp/include/cudf/utilities/default_stream.hpp @@ -16,10 +16,8 @@ #pragma once -#include #include -#include #include namespace CUDF_EXPORT cudf { diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 3f37ae02151..cf8413b597f 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -22,8 +22,6 @@ #include #include -#include - namespace CUDF_EXPORT cudf { /** diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 15b5f921c1b..6351a84e38f 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -16,7 +16,6 @@ #pragma once -#include #include #include #include diff --git a/cpp/src/ast/expression_parser.cpp b/cpp/src/ast/expression_parser.cpp index 3b650d791aa..5815ce33e33 100644 --- a/cpp/src/ast/expression_parser.cpp +++ b/cpp/src/ast/expression_parser.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/cpp/src/ast/expressions.cpp b/cpp/src/ast/expressions.cpp index b45b9d0c78c..4c2b56dd4f5 100644 --- a/cpp/src/ast/expressions.cpp +++ b/cpp/src/ast/expressions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,6 @@ #include #include #include -#include -#include -#include #include #include diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index a6c878efbbc..1b23ea12a5e 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -27,15 +27,10 @@ #include #include #include -#include #include -#include #include #include -#include -#include #include -#include #include #include diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 482413d0ccb..972f97e8668 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -15,19 +15,13 @@ */ #include -#include #include #include #include -#include #include -#include #include -#include #include -#include - namespace cudf { namespace { struct size_of_helper { diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 386c5ebe478..e831aa9645d 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -27,9 +26,7 @@ #include #include -#include #include -#include #include namespace cudf { diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index d60fb5ce110..5e2065ba844 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -20,16 +20,11 @@ #include #include #include -#include #include -#include -#include #include #include -#include - #include namespace cudf { diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index 1282eec6c44..a001807c82b 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp index 832a72ed5b0..116e3516460 100644 --- a/cpp/src/copying/split.cpp +++ b/cpp/src/copying/split.cpp @@ -14,10 +14,8 @@ * limitations under the License. */ -#include #include #include -#include #include #include diff --git a/cpp/src/datetime/timezone.cpp b/cpp/src/datetime/timezone.cpp index 2196ee97fee..f786624680c 100644 --- a/cpp/src/datetime/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -13,12 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp b/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp index 2bf983e5e90..dfad51f27d4 100644 --- a/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp +++ b/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index a9085a1f1fd..3041e261945 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp index a99262fb3bf..c69ebe12d2c 100644 --- a/cpp/src/interop/arrow_utilities.cpp +++ b/cpp/src/interop/arrow_utilities.cpp @@ -20,11 +20,6 @@ #include #include -#include - -#include -#include - #include namespace cudf { diff --git a/cpp/src/interop/arrow_utilities.hpp b/cpp/src/interop/arrow_utilities.hpp index 1b79fbf9eda..e4bdedf6603 100644 --- a/cpp/src/interop/arrow_utilities.hpp +++ b/cpp/src/interop/arrow_utilities.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include #include #include diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index a1be6aade4e..4395b741e53 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -16,11 +16,8 @@ #include #include #include -#include -#include #include #include -#include #include #include #include diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index d5caa4720ac..b3fcca62314 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -17,7 +17,6 @@ #include "avro.hpp" #include -#include #include namespace cudf { diff --git a/cpp/src/io/avro/avro.hpp b/cpp/src/io/avro/avro.hpp index 2e992546ccc..fd2c781b8a1 100644 --- a/cpp/src/io/avro/avro.hpp +++ b/cpp/src/io/avro/avro.hpp @@ -18,11 +18,9 @@ #include "avro_common.hpp" -#include #include #include #include -#include #include #include #include diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp index 583bd6a3523..2e1cda2d6b7 100644 --- a/cpp/src/io/comp/nvcomp_adapter.hpp +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -18,9 +18,7 @@ #include "gpuinflate.hpp" -#include #include -#include #include #include diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index d4d6f46b99a..fb8c308065d 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -24,8 +24,6 @@ #include #include -#include - #include // uncompress #include // memset diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index a8682e6a760..ceaeb5d8f85 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -32,10 +32,8 @@ #include #include #include -#include #include #include -#include #include #include diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu index d06338c6f69..570a00cbfc2 100644 --- a/cpp/src/io/json/host_tree_algorithms.cu +++ b/cpp/src/io/json/host_tree_algorithms.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index f6be4539d7f..7b3b04dea16 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -19,10 +19,7 @@ #include #include #include -#include -#include #include -#include #include #include diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index c42348a165f..0081ed30d17 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/cpp/src/io/orc/reader_impl_helpers.cpp b/cpp/src/io/orc/reader_impl_helpers.cpp index 4c1079cffe8..7e5db4b7617 100644 --- a/cpp/src/io/orc/reader_impl_helpers.cpp +++ b/cpp/src/io/orc/reader_impl_helpers.cpp @@ -16,8 +16,6 @@ #include "reader_impl_helpers.hpp" -#include - namespace cudf::io::orc::detail { std::unique_ptr create_empty_column(size_type orc_col_id, diff --git a/cpp/src/io/orc/reader_impl_helpers.hpp b/cpp/src/io/orc/reader_impl_helpers.hpp index 5528b2ee763..4cded30d89b 100644 --- a/cpp/src/io/orc/reader_impl_helpers.hpp +++ b/cpp/src/io/orc/reader_impl_helpers.hpp @@ -20,9 +20,6 @@ #include "io/orc/orc.hpp" #include "io/utilities/column_buffer.hpp" -#include -#include - #include #include diff --git a/cpp/src/io/parquet/arrow_schema_writer.cpp b/cpp/src/io/parquet/arrow_schema_writer.cpp index ddf65e9020f..d15435b2553 100644 --- a/cpp/src/io/parquet/arrow_schema_writer.cpp +++ b/cpp/src/io/parquet/arrow_schema_writer.cpp @@ -27,7 +27,6 @@ #include "ipc/Schema_generated.h" #include "writer_impl_helpers.hpp" -#include #include #include diff --git a/cpp/src/io/parquet/arrow_schema_writer.hpp b/cpp/src/io/parquet/arrow_schema_writer.hpp index 9bc435bf6c8..66810ee163a 100644 --- a/cpp/src/io/parquet/arrow_schema_writer.hpp +++ b/cpp/src/io/parquet/arrow_schema_writer.hpp @@ -22,10 +22,9 @@ #pragma once #include -#include -#include +#include +#include #include -#include namespace cudf::io::parquet::detail { diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp index 12c24e2b848..b87f2e9c692 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.hpp +++ b/cpp/src/io/parquet/compact_protocol_reader.hpp @@ -22,10 +22,7 @@ #include #include -#include -#include #include -#include namespace CUDF_EXPORT cudf { namespace io::parquet::detail { diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp index d4778b1ea15..05859d60c03 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.hpp +++ b/cpp/src/io/parquet/compact_protocol_writer.hpp @@ -17,7 +17,6 @@ #pragma once #include "parquet.hpp" -#include "parquet_common.hpp" #include #include diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index 32e922b04bb..a965f3325d5 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp index dd354b905f3..170c6e8857f 100644 --- a/cpp/src/io/parquet/reader.cpp +++ b/cpp/src/io/parquet/reader.cpp @@ -16,8 +16,6 @@ #include "reader_impl.hpp" -#include - namespace cudf::io::parquet::detail { reader::reader() = default; diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 0705ff6f5cc..fed1a309064 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -21,11 +21,9 @@ #include #include #include -#include #include #include -#include #include #include diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 4baea8655e0..f4a2f29026a 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -22,10 +22,6 @@ #include #include -#include - -#include - #include namespace cudf::io::text { diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 249dc3b5875..6d954753af8 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -21,12 +21,12 @@ #include "column_buffer.hpp" +#include #include #include #include #include -#include #include namespace cudf::io::detail { diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index e73b2bc88de..31c8b781e77 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -22,12 +22,9 @@ #pragma once #include -#include #include -#include #include #include -#include #include #include @@ -35,6 +32,8 @@ #include +#include + namespace cudf { namespace io { namespace detail { diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index 813743fa7b4..b66742569d9 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -16,14 +16,10 @@ #include "getenv_or.hpp" -#include #include #include -#include -#include -#include #include namespace cudf::io { diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 4e8908a8942..9668b30e9a9 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -33,7 +33,6 @@ #include #include -#include #include namespace cudf { diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 98ed9b28f0a..93cdccfbb9f 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -22,8 +22,6 @@ #include #include -#include - #include #include diff --git a/cpp/src/io/utilities/row_selection.cpp b/cpp/src/io/utilities/row_selection.cpp index c0bbca39167..cf252fe63af 100644 --- a/cpp/src/io/utilities/row_selection.cpp +++ b/cpp/src/io/utilities/row_selection.cpp @@ -16,10 +16,7 @@ #include "io/utilities/row_selection.hpp" -#include - #include -#include namespace cudf::io::detail { diff --git a/cpp/src/io/utilities/row_selection.hpp b/cpp/src/io/utilities/row_selection.hpp index 7c607099cdc..e826feff201 100644 --- a/cpp/src/io/utilities/row_selection.hpp +++ b/cpp/src/io/utilities/row_selection.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include diff --git a/cpp/src/jit/cache.cpp b/cpp/src/jit/cache.cpp index 89c47d246d0..34a0bdce124 100644 --- a/cpp/src/jit/cache.cpp +++ b/cpp/src/jit/cache.cpp @@ -16,11 +16,8 @@ #include -#include - #include -#include #include namespace cudf { diff --git a/cpp/src/jit/util.cpp b/cpp/src/jit/util.cpp index 0585e02a031..d9a29203133 100644 --- a/cpp/src/jit/util.cpp +++ b/cpp/src/jit/util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,6 @@ #include #include -#include - namespace cudf { namespace jit { struct get_data_ptr_functor { diff --git a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp index a9f86ac1b5f..17844b6bb0a 100644 --- a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp +++ b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index d187375b69f..75ebc078930 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -26,8 +26,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index d3c0b54f286..b91ae19b51a 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -14,13 +14,10 @@ * limitations under the License. */ -#include #include #include #include #include -#include -#include namespace cudf { diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 40d1d8a0a53..c4f6c135dde 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -13,16 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include #include #include #include #include #include -#include #include -#include #include #include diff --git a/cpp/src/rolling/detail/optimized_unbounded_window.cpp b/cpp/src/rolling/detail/optimized_unbounded_window.cpp index 72c23395a93..7cad31c0658 100644 --- a/cpp/src/rolling/detail/optimized_unbounded_window.cpp +++ b/cpp/src/rolling/detail/optimized_unbounded_window.cpp @@ -18,13 +18,10 @@ #include #include #include -#include #include #include #include #include -#include -#include #include namespace cudf::detail { diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp index 8a53e937f98..77cb2a8c7f5 100644 --- a/cpp/src/rolling/detail/range_window_bounds.hpp +++ b/cpp/src/rolling/detail/range_window_bounds.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,7 @@ #pragma once #include -#include #include -#include -#include namespace cudf { namespace detail { diff --git a/cpp/src/rolling/range_window_bounds.cpp b/cpp/src/rolling/range_window_bounds.cpp index 69792136c64..7f698dfcd6b 100644 --- a/cpp/src/rolling/range_window_bounds.cpp +++ b/cpp/src/rolling/range_window_bounds.cpp @@ -19,7 +19,6 @@ #include #include #include -#include namespace cudf { namespace { diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 31535198c58..4ec2174a96f 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -26,8 +26,6 @@ #include #include -#include - #include namespace cudf { diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 656fe61fbbe..9f242bdffe0 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -16,10 +16,8 @@ #include #include -#include #include #include -#include #include #include diff --git a/cpp/src/strings/regex/regexec.cpp b/cpp/src/strings/regex/regexec.cpp index d1990733e81..60ad714dfec 100644 --- a/cpp/src/strings/regex/regexec.cpp +++ b/cpp/src/strings/regex/regexec.cpp @@ -24,7 +24,6 @@ #include #include -#include #include #include diff --git a/cpp/src/strings/strings_scalar_factories.cpp b/cpp/src/strings/strings_scalar_factories.cpp index 219d1174d42..1cc405234b2 100644 --- a/cpp/src/strings/strings_scalar_factories.cpp +++ b/cpp/src/strings/strings_scalar_factories.cpp @@ -16,7 +16,6 @@ #include #include -#include #include diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp index b0284e9cb96..e14142a9ad1 100644 --- a/cpp/src/structs/structs_column_view.cpp +++ b/cpp/src/structs/structs_column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 5df9943303d..4012ee3d21c 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -21,13 +21,10 @@ #include #include #include -#include #include #include #include -#include #include -#include #include #include diff --git a/cpp/src/table/table.cpp b/cpp/src/table/table.cpp index cb707c94288..41c64c6decb 100644 --- a/cpp/src/table/table.cpp +++ b/cpp/src/table/table.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index 8a5340dc20d..659beb749af 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -20,10 +20,7 @@ #include #include -#include - #include -#include #include namespace cudf { diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 52b96bc9039..b919ac16956 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -23,8 +23,6 @@ #include #include #include -#include -#include #include #include diff --git a/cpp/src/utilities/cuda.cpp b/cpp/src/utilities/cuda.cpp index 53ca0608170..d979bda41d0 100644 --- a/cpp/src/utilities/cuda.cpp +++ b/cpp/src/utilities/cuda.cpp @@ -18,8 +18,6 @@ #include #include -#include - namespace cudf::detail { cudf::size_type num_multiprocessors() diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp index 9d8e3cf2fa6..e30806a5011 100644 --- a/cpp/src/utilities/host_memory.cpp +++ b/cpp/src/utilities/host_memory.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/utilities/prefetch.cpp b/cpp/src/utilities/prefetch.cpp index 58971552758..000526723c4 100644 --- a/cpp/src/utilities/prefetch.cpp +++ b/cpp/src/utilities/prefetch.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include diff --git a/cpp/src/utilities/stream_pool.cpp b/cpp/src/utilities/stream_pool.cpp index 8c29182bfb5..7069b59be26 100644 --- a/cpp/src/utilities/stream_pool.cpp +++ b/cpp/src/utilities/stream_pool.cpp @@ -23,7 +23,6 @@ #include #include -#include #include #include diff --git a/cpp/src/utilities/traits.cpp b/cpp/src/utilities/traits.cpp index a68dc84e340..c1e71f5f8f9 100644 --- a/cpp/src/utilities/traits.cpp +++ b/cpp/src/utilities/traits.cpp @@ -19,8 +19,6 @@ #include #include -#include - namespace cudf { namespace { diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 3095b342748..84c8529641d 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -21,8 +21,6 @@ #include #include -#include - #include namespace cudf { diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp index a4bde50a21e..7af88d8aa34 100644 --- a/cpp/tests/ast/transform_tests.cpp +++ b/cpp/tests/ast/transform_tests.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -26,14 +25,8 @@ #include #include #include -#include -#include -#include #include #include -#include - -#include #include @@ -41,7 +34,6 @@ #include #include #include -#include #include template diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp index aa5b49567e6..3bd67001c16 100644 --- a/cpp/tests/binaryop/binop-compiled-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -26,9 +26,7 @@ #include #include #include -#include #include -#include #include diff --git a/cpp/tests/binaryop/binop-generic-ptx-test.cpp b/cpp/tests/binaryop/binop-generic-ptx-test.cpp index 03cc87a1968..e9a2761db4a 100644 --- a/cpp/tests/binaryop/binop-generic-ptx-test.cpp +++ b/cpp/tests/binaryop/binop-generic-ptx-test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Copyright 2018-2019 BlazingDB, Inc. * Copyright 2018 Christian Noboa Mardini @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index fe221fb1c48..799bf646e52 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/column/bit_cast_test.cpp b/cpp/tests/column/bit_cast_test.cpp index ab230ab036e..5570a7d498c 100644 --- a/cpp/tests/column/bit_cast_test.cpp +++ b/cpp/tests/column/bit_cast_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -26,8 +25,6 @@ #include -#include - template struct rep_type_impl { using type = void; diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp index 631f5150829..d700adaebd5 100644 --- a/cpp/tests/column/column_test.cpp +++ b/cpp/tests/column/column_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/column/column_view_device_span_test.cpp b/cpp/tests/column/column_view_device_span_test.cpp index 6de9121158b..470437f4112 100644 --- a/cpp/tests/column/column_view_device_span_test.cpp +++ b/cpp/tests/column/column_view_device_span_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp index 37ab4b8f387..ad344476332 100644 --- a/cpp/tests/column/column_view_shallow_test.cpp +++ b/cpp/tests/column/column_view_shallow_test.cpp @@ -15,9 +15,7 @@ */ #include -#include #include -#include #include #include diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index 603187f0330..aa9d508b6aa 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -26,11 +26,8 @@ #include #include #include -#include #include -#include - #include class ColumnFactoryTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp index 18140c34abd..aedc498964a 100644 --- a/cpp/tests/copying/concatenate_tests.cpp +++ b/cpp/tests/copying/concatenate_tests.cpp @@ -34,8 +34,6 @@ #include #include -#include - #include #include #include diff --git a/cpp/tests/copying/copy_if_else_nested_tests.cpp b/cpp/tests/copying/copy_if_else_nested_tests.cpp index cfbd181f944..e1cdfe9beed 100644 --- a/cpp/tests/copying/copy_if_else_nested_tests.cpp +++ b/cpp/tests/copying/copy_if_else_nested_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/copy_range_tests.cpp b/cpp/tests/copying/copy_range_tests.cpp index 25d93da277b..e2133a546e4 100644 --- a/cpp/tests/copying/copy_range_tests.cpp +++ b/cpp/tests/copying/copy_range_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/copy_tests.cpp b/cpp/tests/copying/copy_tests.cpp index 4124f749012..9c00725d5d2 100644 --- a/cpp/tests/copying/copy_tests.cpp +++ b/cpp/tests/copying/copy_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/gather_list_tests.cpp b/cpp/tests/copying/gather_list_tests.cpp index 247090aac90..93f71345c5c 100644 --- a/cpp/tests/copying/gather_list_tests.cpp +++ b/cpp/tests/copying/gather_list_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,6 @@ #include #include #include -#include -#include #include #include diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp index 28098878086..795e3f30aa1 100644 --- a/cpp/tests/copying/gather_str_tests.cpp +++ b/cpp/tests/copying/gather_str_tests.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/gather_struct_tests.cpp b/cpp/tests/copying/gather_struct_tests.cpp index 1598ab2646a..b2c0f7acc3a 100644 --- a/cpp/tests/copying/gather_struct_tests.cpp +++ b/cpp/tests/copying/gather_struct_tests.cpp @@ -17,20 +17,15 @@ #include #include #include -#include #include #include #include #include #include -#include -#include -#include #include #include #include -#include #include diff --git a/cpp/tests/copying/gather_tests.cpp b/cpp/tests/copying/gather_tests.cpp index 07ce672b14d..908dcd67673 100644 --- a/cpp/tests/copying/gather_tests.cpp +++ b/cpp/tests/copying/gather_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 90ff97e7355..b2d64dac7c8 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -16,10 +16,8 @@ #include #include -#include #include #include -#include #include #include diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp index 4f28ff12941..1f76efdc4c3 100644 --- a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp +++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp @@ -16,13 +16,10 @@ #include #include #include -#include #include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/copying/reverse_tests.cpp b/cpp/tests/copying/reverse_tests.cpp index e4b2d319ddf..46516436901 100644 --- a/cpp/tests/copying/reverse_tests.cpp +++ b/cpp/tests/copying/reverse_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,17 +17,13 @@ #include #include #include -#include #include #include #include -#include -#include #include #include -#include #include #include #include diff --git a/cpp/tests/copying/sample_tests.cpp b/cpp/tests/copying/sample_tests.cpp index 2f76e3f1fcd..8be5d8c1fbb 100644 --- a/cpp/tests/copying/sample_tests.cpp +++ b/cpp/tests/copying/sample_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,12 +15,9 @@ */ #include -#include #include -#include #include -#include #include #include #include diff --git a/cpp/tests/copying/scatter_list_scalar_tests.cpp b/cpp/tests/copying/scatter_list_scalar_tests.cpp index 42d2e004d6b..23faa6e5b86 100644 --- a/cpp/tests/copying/scatter_list_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_list_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,6 @@ #include #include -#include using mask_vector = std::vector; using size_column = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp index a82860a3eec..1f87fcfcc99 100644 --- a/cpp/tests/copying/scatter_list_tests.cpp +++ b/cpp/tests/copying/scatter_list_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/scatter_struct_scalar_tests.cpp b/cpp/tests/copying/scatter_struct_scalar_tests.cpp index 78572b0bb37..1d1da8a1b1e 100644 --- a/cpp/tests/copying/scatter_struct_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_struct_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/copying/scatter_struct_tests.cpp b/cpp/tests/copying/scatter_struct_tests.cpp index c92244d047b..7d88e9af85f 100644 --- a/cpp/tests/copying/scatter_struct_tests.cpp +++ b/cpp/tests/copying/scatter_struct_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include using namespace cudf::test::iterators; diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp index 41a753cd0ac..74c04446bdd 100644 --- a/cpp/tests/copying/scatter_tests.cpp +++ b/cpp/tests/copying/scatter_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -23,7 +22,6 @@ #include #include #include -#include #include diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp index 8881fb344a2..a133ae43872 100644 --- a/cpp/tests/copying/segmented_gather_list_tests.cpp +++ b/cpp/tests/copying/segmented_gather_list_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/shift_tests.cpp b/cpp/tests/copying/shift_tests.cpp index ff6808d9a79..72a8e7357bc 100644 --- a/cpp/tests/copying/shift_tests.cpp +++ b/cpp/tests/copying/shift_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -30,7 +29,6 @@ #include #include -#include using TestTypes = cudf::test::Types; diff --git a/cpp/tests/copying/slice_tests.cpp b/cpp/tests/copying/slice_tests.cpp index aef0d4ad78a..3868a147fa8 100644 --- a/cpp/tests/copying/slice_tests.cpp +++ b/cpp/tests/copying/slice_tests.cpp @@ -22,12 +22,8 @@ #include #include -#include #include #include -#include -#include -#include #include #include diff --git a/cpp/tests/copying/utility_tests.cpp b/cpp/tests/copying/utility_tests.cpp index 0905f9babdc..90457f8d74c 100644 --- a/cpp/tests/copying/utility_tests.cpp +++ b/cpp/tests/copying/utility_tests.cpp @@ -23,7 +23,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 603edb27c7c..44f99adc0e9 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -23,14 +23,11 @@ #include #include -#include #include #include #include #include -#include - #define XXX false // stub for null values constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; diff --git a/cpp/tests/dictionary/add_keys_test.cpp b/cpp/tests/dictionary/add_keys_test.cpp index 46bf5468922..ebc8c11e86c 100644 --- a/cpp/tests/dictionary/add_keys_test.cpp +++ b/cpp/tests/dictionary/add_keys_test.cpp @@ -24,8 +24,6 @@ #include #include -#include - struct DictionaryAddKeysTest : public cudf::test::BaseFixture {}; TEST_F(DictionaryAddKeysTest, StringsColumn) diff --git a/cpp/tests/dictionary/encode_test.cpp b/cpp/tests/dictionary/encode_test.cpp index 5db0e9fa1e4..dfa3ede5d46 100644 --- a/cpp/tests/dictionary/encode_test.cpp +++ b/cpp/tests/dictionary/encode_test.cpp @@ -21,8 +21,6 @@ #include #include -#include - struct DictionaryEncodeTest : public cudf::test::BaseFixture {}; TEST_F(DictionaryEncodeTest, EncodeStringColumn) diff --git a/cpp/tests/dictionary/fill_test.cpp b/cpp/tests/dictionary/fill_test.cpp index 18696b66e48..bc7d19201aa 100644 --- a/cpp/tests/dictionary/fill_test.cpp +++ b/cpp/tests/dictionary/fill_test.cpp @@ -18,13 +18,10 @@ #include #include -#include #include #include #include -#include - struct DictionaryFillTest : public cudf::test::BaseFixture {}; TEST_F(DictionaryFillTest, StringsColumn) diff --git a/cpp/tests/dictionary/search_test.cpp b/cpp/tests/dictionary/search_test.cpp index 25501b4fde7..2774173b80a 100644 --- a/cpp/tests/dictionary/search_test.cpp +++ b/cpp/tests/dictionary/search_test.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include diff --git a/cpp/tests/dictionary/slice_test.cpp b/cpp/tests/dictionary/slice_test.cpp index d80f8dee079..8c15d6dbecd 100644 --- a/cpp/tests/dictionary/slice_test.cpp +++ b/cpp/tests/dictionary/slice_test.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/filling/fill_tests.cpp b/cpp/tests/filling/fill_tests.cpp index 26badefe698..a5e2db6a005 100644 --- a/cpp/tests/filling/fill_tests.cpp +++ b/cpp/tests/filling/fill_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/filling/repeat_tests.cpp b/cpp/tests/filling/repeat_tests.cpp index 6326765c68b..c856984a4a3 100644 --- a/cpp/tests/filling/repeat_tests.cpp +++ b/cpp/tests/filling/repeat_tests.cpp @@ -17,14 +17,11 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include @@ -33,7 +30,6 @@ #include #include -#include constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; diff --git a/cpp/tests/filling/sequence_tests.cpp b/cpp/tests/filling/sequence_tests.cpp index 0783b4e5bbb..53782c90c26 100644 --- a/cpp/tests/filling/sequence_tests.cpp +++ b/cpp/tests/filling/sequence_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp index a222289216d..b96c6909e55 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cpp +++ b/cpp/tests/fixed_point/fixed_point_tests.cpp @@ -18,17 +18,14 @@ #include #include #include -#include #include #include -#include #include #include #include #include -#include #include using namespace numeric; diff --git a/cpp/tests/groupby/collect_list_tests.cpp b/cpp/tests/groupby/collect_list_tests.cpp index a79b6a32916..ba456084a7c 100644 --- a/cpp/tests/groupby/collect_list_tests.cpp +++ b/cpp/tests/groupby/collect_list_tests.cpp @@ -20,8 +20,6 @@ #include #include -#include - template struct groupby_collect_list_test : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp index 61d2838590b..dfd7eb82c4a 100644 --- a/cpp/tests/groupby/collect_set_tests.cpp +++ b/cpp/tests/groupby/collect_set_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/groupby/correlation_tests.cpp b/cpp/tests/groupby/correlation_tests.cpp index 26f714632dd..f8cc813e877 100644 --- a/cpp/tests/groupby/correlation_tests.cpp +++ b/cpp/tests/groupby/correlation_tests.cpp @@ -25,7 +25,6 @@ #include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/groupby/covariance_tests.cpp b/cpp/tests/groupby/covariance_tests.cpp index e3eb2da201f..81378bb91e8 100644 --- a/cpp/tests/groupby/covariance_tests.cpp +++ b/cpp/tests/groupby/covariance_tests.cpp @@ -23,10 +23,8 @@ #include #include -#include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/groupby/groupby_test_util.cpp b/cpp/tests/groupby/groupby_test_util.cpp index 5d99d15ae77..df0375d6a09 100644 --- a/cpp/tests/groupby/groupby_test_util.cpp +++ b/cpp/tests/groupby/groupby_test_util.cpp @@ -17,8 +17,8 @@ #include "groupby_test_util.hpp" #include -#include #include +#include #include #include @@ -27,9 +27,6 @@ #include #include #include -#include - -#include void test_single_agg(cudf::column_view const& keys, cudf::column_view const& values, diff --git a/cpp/tests/groupby/groupby_test_util.hpp b/cpp/tests/groupby/groupby_test_util.hpp index 755b0c20f17..9d2e613be3e 100644 --- a/cpp/tests/groupby/groupby_test_util.hpp +++ b/cpp/tests/groupby/groupby_test_util.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,11 +16,8 @@ #pragma once -#include #include -#include #include -#include enum class force_use_sort_impl : bool { NO, YES }; diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index 2d447025919..783cfb17e49 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/groupby/max_scan_tests.cpp b/cpp/tests/groupby/max_scan_tests.cpp index d86de798844..6195e0179ec 100644 --- a/cpp/tests/groupby/max_scan_tests.cpp +++ b/cpp/tests/groupby/max_scan_tests.cpp @@ -22,7 +22,6 @@ #include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/groupby/merge_lists_tests.cpp b/cpp/tests/groupby/merge_lists_tests.cpp index 279d71560b4..4481e2dc022 100644 --- a/cpp/tests/groupby/merge_lists_tests.cpp +++ b/cpp/tests/groupby/merge_lists_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/groupby/merge_sets_tests.cpp b/cpp/tests/groupby/merge_sets_tests.cpp index 9736bb84dd6..1bfba265478 100644 --- a/cpp/tests/groupby/merge_sets_tests.cpp +++ b/cpp/tests/groupby/merge_sets_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp index 7f31bc9089f..f2a50248b4a 100644 --- a/cpp/tests/groupby/rank_scan_tests.cpp +++ b/cpp/tests/groupby/rank_scan_tests.cpp @@ -22,8 +22,6 @@ #include #include -#include - using namespace cudf::test::iterators; template diff --git a/cpp/tests/groupby/shift_tests.cpp b/cpp/tests/groupby/shift_tests.cpp index 14c9ceb4508..49f9d7cb10a 100644 --- a/cpp/tests/groupby/shift_tests.cpp +++ b/cpp/tests/groupby/shift_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include template diff --git a/cpp/tests/hashing/md5_test.cpp b/cpp/tests/hashing/md5_test.cpp index 69e518cbf8d..b54adb52496 100644 --- a/cpp/tests/hashing/md5_test.cpp +++ b/cpp/tests/hashing/md5_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/hashing/murmurhash3_x86_32_test.cpp b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp index c1a6e6ff6e1..b4622f5eb81 100644 --- a/cpp/tests/hashing/murmurhash3_x86_32_test.cpp +++ b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp @@ -17,11 +17,9 @@ #include #include #include -#include #include #include -#include #include constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; diff --git a/cpp/tests/hashing/sha1_test.cpp b/cpp/tests/hashing/sha1_test.cpp index e28e71442a6..1e86751bb4c 100644 --- a/cpp/tests/hashing/sha1_test.cpp +++ b/cpp/tests/hashing/sha1_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/hashing/sha224_test.cpp b/cpp/tests/hashing/sha224_test.cpp index 61b584f94df..259e7102ee2 100644 --- a/cpp/tests/hashing/sha224_test.cpp +++ b/cpp/tests/hashing/sha224_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/hashing/sha256_test.cpp b/cpp/tests/hashing/sha256_test.cpp index 8bc47c92c6b..a4affc87874 100644 --- a/cpp/tests/hashing/sha256_test.cpp +++ b/cpp/tests/hashing/sha256_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/hashing/sha384_test.cpp b/cpp/tests/hashing/sha384_test.cpp index 4c79934f98d..8a5c090eeea 100644 --- a/cpp/tests/hashing/sha384_test.cpp +++ b/cpp/tests/hashing/sha384_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/hashing/sha512_test.cpp b/cpp/tests/hashing/sha512_test.cpp index 0eb1c60b8fc..77fc56b5f13 100644 --- a/cpp/tests/hashing/sha512_test.cpp +++ b/cpp/tests/hashing/sha512_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/hashing/xxhash_64_test.cpp b/cpp/tests/hashing/xxhash_64_test.cpp index ab4ed829681..d8694a72d94 100644 --- a/cpp/tests/hashing/xxhash_64_test.cpp +++ b/cpp/tests/hashing/xxhash_64_test.cpp @@ -17,11 +17,8 @@ #include #include #include -#include #include -#include -#include #include using NumericTypesNoBools = diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp index 2151ec6e22f..1ddc33e749a 100644 --- a/cpp/tests/interop/from_arrow_device_test.cpp +++ b/cpp/tests/interop/from_arrow_device_test.cpp @@ -17,17 +17,13 @@ #include "nanoarrow_utils.hpp" #include -#include #include #include -#include #include #include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp index ef9936b214c..d93ef28aab8 100644 --- a/cpp/tests/interop/from_arrow_host_test.cpp +++ b/cpp/tests/interop/from_arrow_host_test.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -28,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/interop/from_arrow_stream_test.cpp b/cpp/tests/interop/from_arrow_stream_test.cpp index 80a2e4b2ffd..3916025bf22 100644 --- a/cpp/tests/interop/from_arrow_stream_test.cpp +++ b/cpp/tests/interop/from_arrow_stream_test.cpp @@ -17,27 +17,14 @@ #include "nanoarrow_utils.hpp" #include -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include #include #include #include -#include #include -#include - struct VectorOfArrays { std::vector arrays; nanoarrow::UniqueSchema schema; diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 6e742b9e4cf..18efae75cb1 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -25,9 +25,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -37,8 +35,6 @@ #include #include -#include -#include std::unique_ptr get_cudf_table() { diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp index 7ba586461dc..29aa928c277 100644 --- a/cpp/tests/interop/to_arrow_device_test.cpp +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -17,21 +17,15 @@ #include "nanoarrow_utils.hpp" #include -#include #include -#include -#include #include #include -#include -#include #include #include #include #include #include -#include #include #include diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp index fcb4433b42e..fa3aa82fee2 100644 --- a/cpp/tests/interop/to_arrow_host_test.cpp +++ b/cpp/tests/interop/to_arrow_host_test.cpp @@ -17,20 +17,14 @@ #include "nanoarrow_utils.hpp" #include -#include #include -#include -#include #include #include #include #include -#include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index a6aa4b22eca..86295d8efb1 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -19,14 +19,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index b265dcf9273..cc1e367d114 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -17,14 +17,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include @@ -32,18 +30,12 @@ #include #include #include -#include -#include #include -#include #include -#include - #include #include -#include #include #include #include diff --git a/cpp/tests/io/file_io_test.cpp b/cpp/tests/io/file_io_test.cpp index 3c41f21b0a4..1b85541687a 100644 --- a/cpp/tests/io/file_io_test.cpp +++ b/cpp/tests/io/file_io_test.cpp @@ -15,13 +15,10 @@ */ #include -#include #include #include -#include - // Base test fixture for tests struct CuFileIOTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/io/json/json_quote_normalization_test.cpp b/cpp/tests/io/json/json_quote_normalization_test.cpp index d23acf3ae00..c8c2d18903f 100644 --- a/cpp/tests/io/json/json_quote_normalization_test.cpp +++ b/cpp/tests/io/json/json_quote_normalization_test.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include @@ -29,7 +28,6 @@ #include #include -#include #include diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp index cb6716f4a18..5f070bd53b9 100644 --- a/cpp/tests/io/json/json_test.cpp +++ b/cpp/tests/io/json/json_test.cpp @@ -39,8 +39,6 @@ #include -#include - #include #include #include diff --git a/cpp/tests/io/json/json_tree.cpp b/cpp/tests/io/json/json_tree.cpp index 15682c6ae6b..887d4fa783f 100644 --- a/cpp/tests/io/json/json_tree.cpp +++ b/cpp/tests/io/json/json_tree.cpp @@ -15,12 +15,8 @@ */ #include "io/json/nested_json.hpp" -#include "io/utilities/hostdevice_vector.hpp" #include -#include -#include -#include #include #include @@ -29,9 +25,9 @@ #include #include -#include #include +#include #include #include #include diff --git a/cpp/tests/io/json/nested_json_test.cpp b/cpp/tests/io/json/nested_json_test.cpp index f32aba0e632..e0e955c4f48 100644 --- a/cpp/tests/io/json/nested_json_test.cpp +++ b/cpp/tests/io/json/nested_json_test.cpp @@ -21,24 +21,16 @@ #include #include #include -#include #include -#include #include -#include #include -#include #include -#include #include #include #include #include -#include - -#include #include #include diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index cce0adbf317..fce99187516 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp index c90b81ed27a..d66aa3bde9d 100644 --- a/cpp/tests/io/parquet_common.hpp +++ b/cpp/tests/io/parquet_common.hpp @@ -22,13 +22,11 @@ #include #include -#include #include #include #include #include -#include #include #include diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp index f1286a00d22..d66f685cd9c 100644 --- a/cpp/tests/io/parquet_misc_test.cpp +++ b/cpp/tests/io/parquet_misc_test.cpp @@ -20,8 +20,6 @@ #include #include -#include -#include #include diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp index 7986a3c6d70..177e6163d4f 100644 --- a/cpp/tests/io/parquet_reader_test.cpp +++ b/cpp/tests/io/parquet_reader_test.cpp @@ -29,6 +29,8 @@ #include #include +#include + #include TEST_F(ParquetReaderTest, UserBounds) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index be2ecd56424..5c3c8342cd2 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include // NOTE: this file exists to define the parquet test's `main()` function. diff --git a/cpp/tests/io/row_selection_test.cpp b/cpp/tests/io/row_selection_test.cpp index ebadd870091..c40d3bbd299 100644 --- a/cpp/tests/io/row_selection_test.cpp +++ b/cpp/tests/io/row_selection_test.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp index 6f46df20633..79ce908f3e0 100644 --- a/cpp/tests/io/text/data_chunk_source_test.cpp +++ b/cpp/tests/io/text/data_chunk_source_test.cpp @@ -15,14 +15,11 @@ */ #include -#include #include #include #include -#include - #include #include diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp index 74d08061df9..60244462e2c 100644 --- a/cpp/tests/io/text/multibyte_split_test.cpp +++ b/cpp/tests/io/text/multibyte_split_test.cpp @@ -19,16 +19,12 @@ #include #include #include -#include -#include #include -#include #include #include #include #include -#include #include using cudf::test::strings_column_wrapper; diff --git a/cpp/tests/iterator/value_iterator.cpp b/cpp/tests/iterator/value_iterator.cpp index 22bc7475dbe..f7f7c0f2721 100644 --- a/cpp/tests/iterator/value_iterator.cpp +++ b/cpp/tests/iterator/value_iterator.cpp @@ -13,7 +13,6 @@ * the License. */ -#include #include CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/jit/parse_ptx_function.cpp b/cpp/tests/jit/parse_ptx_function.cpp index 6f9dfd06730..c9bb691907a 100644 --- a/cpp/tests/jit/parse_ptx_function.cpp +++ b/cpp/tests/jit/parse_ptx_function.cpp @@ -16,7 +16,6 @@ #include "jit/parser.hpp" -#include #include #include diff --git a/cpp/tests/join/cross_join_tests.cpp b/cpp/tests/join/cross_join_tests.cpp index d87f5e54153..971913443e5 100644 --- a/cpp/tests/join/cross_join_tests.cpp +++ b/cpp/tests/join/cross_join_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index 178edc52dd3..9070efa38fe 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -15,12 +15,8 @@ */ #include -#include #include -#include #include -#include -#include #include #include @@ -31,7 +27,6 @@ #include #include -#include #include template diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 3431e941359..6a8a54c8465 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -20,17 +20,12 @@ #include #include #include -#include #include -#include #include #include -#include -#include #include #include -#include #include #include #include diff --git a/cpp/tests/join/semi_anti_join_tests.cpp b/cpp/tests/join/semi_anti_join_tests.cpp index 554d5754e39..ddc65c3f379 100644 --- a/cpp/tests/join/semi_anti_join_tests.cpp +++ b/cpp/tests/join/semi_anti_join_tests.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/json/json_tests.cpp b/cpp/tests/json/json_tests.cpp index 42a574ac5c0..53166e04173 100644 --- a/cpp/tests/json/json_tests.cpp +++ b/cpp/tests/json/json_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/large_strings/large_strings_fixture.cpp b/cpp/tests/large_strings/large_strings_fixture.cpp index 7b61be113f9..f1404990354 100644 --- a/cpp/tests/large_strings/large_strings_fixture.cpp +++ b/cpp/tests/large_strings/large_strings_fixture.cpp @@ -16,12 +16,10 @@ #include "large_strings_fixture.hpp" -#include #include #include #include -#include #include #include diff --git a/cpp/tests/large_strings/parquet_tests.cpp b/cpp/tests/large_strings/parquet_tests.cpp index 007c08ce0fb..f47782a2d02 100644 --- a/cpp/tests/large_strings/parquet_tests.cpp +++ b/cpp/tests/large_strings/parquet_tests.cpp @@ -16,8 +16,6 @@ #include "large_strings_fixture.hpp" -#include -#include #include #include diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp index 8fb2b403051..7ae7a6a7414 100644 --- a/cpp/tests/lists/contains_tests.cpp +++ b/cpp/tests/lists/contains_tests.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp index 92dd5df5ec7..2c24f695c29 100644 --- a/cpp/tests/lists/extract_tests.cpp +++ b/cpp/tests/lists/extract_tests.cpp @@ -21,12 +21,8 @@ #include #include -#include -#include #include -#include - #include #include #include diff --git a/cpp/tests/lists/sequences_tests.cpp b/cpp/tests/lists/sequences_tests.cpp index 74545903eb3..dcb906cd2ef 100644 --- a/cpp/tests/lists/sequences_tests.cpp +++ b/cpp/tests/lists/sequences_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp index 5625b47e7ea..18aa118bb81 100644 --- a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp +++ b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp @@ -20,8 +20,6 @@ #include #include -#include -#include #include namespace cudf::test { diff --git a/cpp/tests/merge/merge_dictionary_test.cpp b/cpp/tests/merge/merge_dictionary_test.cpp index dd528c19e4e..1d7a31fd797 100644 --- a/cpp/tests/merge/merge_dictionary_test.cpp +++ b/cpp/tests/merge/merge_dictionary_test.cpp @@ -17,9 +17,7 @@ #include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index bea044496b3..d9fdb6099f0 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -17,10 +17,8 @@ #include #include #include -#include #include -#include #include #include #include @@ -30,10 +28,6 @@ #include -#include -#include -#include -#include #include #include diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index 6208d395f0a..fad390105d7 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -34,7 +33,6 @@ #include #include -#include #include diff --git a/cpp/tests/partitioning/round_robin_test.cpp b/cpp/tests/partitioning/round_robin_test.cpp index 89d23c39dca..3693cfbcc72 100644 --- a/cpp/tests/partitioning/round_robin_test.cpp +++ b/cpp/tests/partitioning/round_robin_test.cpp @@ -17,10 +17,8 @@ #include #include #include -#include #include -#include #include #include #include @@ -30,12 +28,7 @@ #include -#include -#include -#include -#include #include -#include #include using cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/quantiles/quantile_test.cpp b/cpp/tests/quantiles/quantile_test.cpp index 6e88365b6e8..23b58618fe1 100644 --- a/cpp/tests/quantiles/quantile_test.cpp +++ b/cpp/tests/quantiles/quantile_test.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/quantiles/quantiles_test.cpp b/cpp/tests/quantiles/quantiles_test.cpp index 44d4ec61852..c7e11af8c85 100644 --- a/cpp/tests/quantiles/quantiles_test.cpp +++ b/cpp/tests/quantiles/quantiles_test.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/reductions/ewm_tests.cpp b/cpp/tests/reductions/ewm_tests.cpp index 09cec688509..1117b0d1acf 100644 --- a/cpp/tests/reductions/ewm_tests.cpp +++ b/cpp/tests/reductions/ewm_tests.cpp @@ -18,9 +18,7 @@ #include #include -#include -#include #include template diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp index f5470f7d881..736b5081d8f 100644 --- a/cpp/tests/reductions/list_rank_test.cpp +++ b/cpp/tests/reductions/list_rank_test.cpp @@ -14,14 +14,9 @@ * limitations under the License. */ -#include - #include #include -#include -#include -#include #include struct ListRankScanTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp index 3ab1fc01eaa..19633211192 100644 --- a/cpp/tests/reductions/rank_tests.cpp +++ b/cpp/tests/reductions/rank_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index bdb98372836..c09cde8f9e4 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -22,9 +22,7 @@ #include #include -#include #include -#include #include #include #include @@ -33,11 +31,9 @@ #include #include -#include #include #include -#include #include #include diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index c4463d68a68..72d92c5ac53 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -20,13 +20,11 @@ #include #include -#include #include #include #include #include -#include #include #include diff --git a/cpp/tests/reductions/scan_tests.hpp b/cpp/tests/reductions/scan_tests.hpp index 858697d8ef5..c2cce4bbbfa 100644 --- a/cpp/tests/reductions/scan_tests.hpp +++ b/cpp/tests/reductions/scan_tests.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,9 +20,7 @@ #include #include -#include #include -#include #include #include @@ -30,7 +28,6 @@ #include #include -#include template struct TypeParam_to_host_type { diff --git a/cpp/tests/replace/clamp_test.cpp b/cpp/tests/replace/clamp_test.cpp index 239c9ce6ddd..e972ea35ed0 100644 --- a/cpp/tests/replace/clamp_test.cpp +++ b/cpp/tests/replace/clamp_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/replace/normalize_replace_tests.cpp b/cpp/tests/replace/normalize_replace_tests.cpp index 2de17388ee8..c35f385329a 100644 --- a/cpp/tests/replace/normalize_replace_tests.cpp +++ b/cpp/tests/replace/normalize_replace_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include // This is the main test fixture diff --git a/cpp/tests/replace/replace_nans_tests.cpp b/cpp/tests/replace/replace_nans_tests.cpp index 35232204db7..1b9fe92066a 100644 --- a/cpp/tests/replace/replace_nans_tests.cpp +++ b/cpp/tests/replace/replace_nans_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp index fcee27305f2..0c8ccea52a6 100644 --- a/cpp/tests/replace/replace_nulls_tests.cpp +++ b/cpp/tests/replace/replace_nulls_tests.cpp @@ -20,13 +20,11 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp index b12bf08520f..ae4041bcfaf 100644 --- a/cpp/tests/replace/replace_tests.cpp +++ b/cpp/tests/replace/replace_tests.cpp @@ -20,20 +20,16 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include #include #include -#include #include diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp index b3d9b2e2f5f..59585c0e947 100644 --- a/cpp/tests/reshape/byte_cast_tests.cpp +++ b/cpp/tests/reshape/byte_cast_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/reshape/tile_tests.cpp b/cpp/tests/reshape/tile_tests.cpp index ed76b9d2ea5..25cfc5c5108 100644 --- a/cpp/tests/reshape/tile_tests.cpp +++ b/cpp/tests/reshape/tile_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index 165e0347785..e8a36d9ab48 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/empty_input_test.cpp b/cpp/tests/rolling/empty_input_test.cpp index e7d1e3f0b10..2e1815671a9 100644 --- a/cpp/tests/rolling/empty_input_test.cpp +++ b/cpp/tests/rolling/empty_input_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,7 @@ */ #include -#include #include -#include #include #include diff --git a/cpp/tests/rolling/grouped_rolling_range_test.cpp b/cpp/tests/rolling/grouped_rolling_range_test.cpp index fcfbd0eee78..2cb9b60000b 100644 --- a/cpp/tests/rolling/grouped_rolling_range_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_range_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,21 +17,16 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include #include #include -#include -#include #include #include diff --git a/cpp/tests/rolling/grouped_rolling_test.cpp b/cpp/tests/rolling/grouped_rolling_test.cpp index 78d5daf7e83..78b444bcd93 100644 --- a/cpp/tests/rolling/grouped_rolling_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_test.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/lead_lag_test.cpp b/cpp/tests/rolling/lead_lag_test.cpp index de057e96320..6519b0ed4ee 100644 --- a/cpp/tests/rolling/lead_lag_test.cpp +++ b/cpp/tests/rolling/lead_lag_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -26,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/nth_element_test.cpp b/cpp/tests/rolling/nth_element_test.cpp index 2444992e68f..5f2b383ed55 100644 --- a/cpp/tests/rolling/nth_element_test.cpp +++ b/cpp/tests/rolling/nth_element_test.cpp @@ -17,22 +17,15 @@ #include #include #include -#include #include #include #include -#include -#include #include -#include - #include #include -#include - #include #include diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp index 0eaab0c9f7a..dcaa47e722b 100644 --- a/cpp/tests/rolling/offset_row_window_test.cpp +++ b/cpp/tests/rolling/offset_row_window_test.cpp @@ -17,14 +17,10 @@ #include #include #include -#include #include #include -#include -#include #include -#include template using fwcw = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/rolling/range_rolling_window_test.cpp b/cpp/tests/rolling/range_rolling_window_test.cpp index 461c41025e9..daf5fcc1d96 100644 --- a/cpp/tests/rolling/range_rolling_window_test.cpp +++ b/cpp/tests/rolling/range_rolling_window_test.cpp @@ -17,22 +17,17 @@ #include #include #include -#include #include #include -#include #include #include -#include -#include #include #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/range_window_bounds_test.cpp b/cpp/tests/rolling/range_window_bounds_test.cpp index b77451bf0bc..a67555280f4 100644 --- a/cpp/tests/rolling/range_window_bounds_test.cpp +++ b/cpp/tests/rolling/range_window_bounds_test.cpp @@ -15,9 +15,6 @@ */ #include -#include -#include -#include #include #include @@ -25,8 +22,6 @@ #include -#include - struct RangeWindowBoundsTest : public cudf::test::BaseFixture {}; template diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index 6e0dc16dca9..72a511fd5f1 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -30,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp index 5f132f3ace9..26987ea1b7b 100644 --- a/cpp/tests/scalar/factories_test.cpp +++ b/cpp/tests/scalar/factories_test.cpp @@ -22,11 +22,8 @@ #include #include -#include #include -#include - class ScalarFactoryTest : public cudf::test::BaseFixture {}; template diff --git a/cpp/tests/search/search_dictionary_test.cpp b/cpp/tests/search/search_dictionary_test.cpp index 78f79ccc648..a3bb1dfda10 100644 --- a/cpp/tests/search/search_dictionary_test.cpp +++ b/cpp/tests/search/search_dictionary_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include diff --git a/cpp/tests/search/search_list_test.cpp b/cpp/tests/search/search_list_test.cpp index 7584003e800..fb5d0fcc889 100644 --- a/cpp/tests/search/search_list_test.cpp +++ b/cpp/tests/search/search_list_test.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index c35d359e75c..05b9deb3463 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/search/search_test.cpp b/cpp/tests/search/search_test.cpp index 7550cc27161..8d750be5677 100644 --- a/cpp/tests/search/search_test.cpp +++ b/cpp/tests/search/search_test.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/sort/is_sorted_tests.cpp b/cpp/tests/sort/is_sorted_tests.cpp index 109095192f9..e3c9f8d349e 100644 --- a/cpp/tests/sort/is_sorted_tests.cpp +++ b/cpp/tests/sort/is_sorted_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp index e08a2105aea..ded46cb1f31 100644 --- a/cpp/tests/sort/rank_test.cpp +++ b/cpp/tests/sort/rank_test.cpp @@ -18,10 +18,8 @@ #include #include #include -#include #include -#include #include #include #include diff --git a/cpp/tests/sort/sort_nested_types_tests.cpp b/cpp/tests/sort/sort_nested_types_tests.cpp index 8ab23936ceb..ce4148a941e 100644 --- a/cpp/tests/sort/sort_nested_types_tests.cpp +++ b/cpp/tests/sort/sort_nested_types_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 6a35e977b46..e1505c7a474 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -28,7 +28,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/sort/stable_sort_tests.cpp b/cpp/tests/sort/stable_sort_tests.cpp index 655166e0d62..88de9d51523 100644 --- a/cpp/tests/sort/stable_sort_tests.cpp +++ b/cpp/tests/sort/stable_sort_tests.cpp @@ -25,9 +25,6 @@ #include #include -#include -#include - #include #include diff --git a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp index 6c0582fb846..1204b019739 100644 --- a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp +++ b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp @@ -20,9 +20,7 @@ #include #include #include -#include -#include #include #include #include @@ -31,8 +29,6 @@ #include #include -#include -#include #include struct ApplyBooleanMask : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/stream_compaction/distinct_count_tests.cpp b/cpp/tests/stream_compaction/distinct_count_tests.cpp index a2dab649961..ee1bb3ead92 100644 --- a/cpp/tests/stream_compaction/distinct_count_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_count_tests.cpp @@ -15,16 +15,11 @@ */ #include -#include #include #include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 14d7d8789ac..c618ff68cbb 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -27,8 +26,6 @@ #include #include -#include - auto constexpr null{0}; // null at current level auto constexpr XXX{0}; // null pushed down from parent level auto constexpr NaN = std::numeric_limits::quiet_NaN(); diff --git a/cpp/tests/stream_compaction/drop_nans_tests.cpp b/cpp/tests/stream_compaction/drop_nans_tests.cpp index bf72da5c840..71321361564 100644 --- a/cpp/tests/stream_compaction/drop_nans_tests.cpp +++ b/cpp/tests/stream_compaction/drop_nans_tests.cpp @@ -15,12 +15,9 @@ */ #include -#include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/stream_compaction/drop_nulls_tests.cpp b/cpp/tests/stream_compaction/drop_nulls_tests.cpp index dbac1d58195..d3b45c2323e 100644 --- a/cpp/tests/stream_compaction/drop_nulls_tests.cpp +++ b/cpp/tests/stream_compaction/drop_nulls_tests.cpp @@ -15,12 +15,10 @@ */ #include -#include #include #include #include -#include #include #include #include diff --git a/cpp/tests/stream_compaction/stable_distinct_tests.cpp b/cpp/tests/stream_compaction/stable_distinct_tests.cpp index 6c6c53331d4..cc847da6340 100644 --- a/cpp/tests/stream_compaction/stable_distinct_tests.cpp +++ b/cpp/tests/stream_compaction/stable_distinct_tests.cpp @@ -15,20 +15,16 @@ */ #include -#include #include #include #include #include -#include #include #include #include #include -#include - auto constexpr null{0}; // null at current level auto constexpr XXX{0}; // null pushed down from parent level auto constexpr NaN = std::numeric_limits::quiet_NaN(); diff --git a/cpp/tests/stream_compaction/unique_count_tests.cpp b/cpp/tests/stream_compaction/unique_count_tests.cpp index 640d159fc4f..bad93e92712 100644 --- a/cpp/tests/stream_compaction/unique_count_tests.cpp +++ b/cpp/tests/stream_compaction/unique_count_tests.cpp @@ -15,16 +15,11 @@ */ #include -#include #include #include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/stream_compaction/unique_tests.cpp b/cpp/tests/stream_compaction/unique_tests.cpp index d5b6915b520..e2b32b898b3 100644 --- a/cpp/tests/stream_compaction/unique_tests.cpp +++ b/cpp/tests/stream_compaction/unique_tests.cpp @@ -15,22 +15,16 @@ */ #include -#include #include #include #include -#include #include -#include #include #include #include #include -#include -#include - using cudf::nan_policy; using cudf::null_equality; using cudf::null_policy; diff --git a/cpp/tests/streams/binaryop_test.cpp b/cpp/tests/streams/binaryop_test.cpp index 2a7b52b1b6b..3dcc6f9e632 100644 --- a/cpp/tests/streams/binaryop_test.cpp +++ b/cpp/tests/streams/binaryop_test.cpp @@ -21,7 +21,6 @@ #include #include -#include #include class BinaryopTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/io/csv_test.cpp b/cpp/tests/streams/io/csv_test.cpp index 42894a0ebcb..a74ee64f8de 100644 --- a/cpp/tests/streams/io/csv_test.cpp +++ b/cpp/tests/streams/io/csv_test.cpp @@ -17,13 +17,9 @@ #include #include #include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/streams/io/json_test.cpp b/cpp/tests/streams/io/json_test.cpp index f98e685ed0c..d352c6c3b2a 100644 --- a/cpp/tests/streams/io/json_test.cpp +++ b/cpp/tests/streams/io/json_test.cpp @@ -19,9 +19,7 @@ #include #include -#include #include -#include #include #include diff --git a/cpp/tests/streams/io/multibyte_split_test.cpp b/cpp/tests/streams/io/multibyte_split_test.cpp index b0eff1d3340..5bb17226029 100644 --- a/cpp/tests/streams/io/multibyte_split_test.cpp +++ b/cpp/tests/streams/io/multibyte_split_test.cpp @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/streams/io/orc_test.cpp b/cpp/tests/streams/io/orc_test.cpp index cc43bf15b5d..10722557e6a 100644 --- a/cpp/tests/streams/io/orc_test.cpp +++ b/cpp/tests/streams/io/orc_test.cpp @@ -17,19 +17,11 @@ #include #include #include -#include -#include #include #include -#include #include -#include -#include -#include -#include -#include #include #include diff --git a/cpp/tests/streams/io/parquet_test.cpp b/cpp/tests/streams/io/parquet_test.cpp index 9d2dec2d697..18bb80e64af 100644 --- a/cpp/tests/streams/io/parquet_test.cpp +++ b/cpp/tests/streams/io/parquet_test.cpp @@ -17,13 +17,9 @@ #include #include #include -#include -#include #include #include -#include -#include #include #include diff --git a/cpp/tests/streams/join_test.cpp b/cpp/tests/streams/join_test.cpp index 2811bb676fa..27bd7e080c9 100644 --- a/cpp/tests/streams/join_test.cpp +++ b/cpp/tests/streams/join_test.cpp @@ -19,11 +19,9 @@ #include #include -#include #include #include #include -#include #include #include diff --git a/cpp/tests/streams/null_mask_test.cpp b/cpp/tests/streams/null_mask_test.cpp index e96224003f4..ed37a72545f 100644 --- a/cpp/tests/streams/null_mask_test.cpp +++ b/cpp/tests/streams/null_mask_test.cpp @@ -14,15 +14,12 @@ * limitations under the License. */ -#include - #include #include #include #include #include -#include class NullMaskTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/reduction_test.cpp b/cpp/tests/streams/reduction_test.cpp index b4f013fc960..9ab972302e4 100644 --- a/cpp/tests/streams/reduction_test.cpp +++ b/cpp/tests/streams/reduction_test.cpp @@ -17,11 +17,8 @@ #include #include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/streams/rolling_test.cpp b/cpp/tests/streams/rolling_test.cpp index b352ad2c0d2..4d9899870b4 100644 --- a/cpp/tests/streams/rolling_test.cpp +++ b/cpp/tests/streams/rolling_test.cpp @@ -17,12 +17,10 @@ #include #include #include -#include #include #include #include -#include class RollingTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp index 07b2d77cc04..e7b282601e1 100644 --- a/cpp/tests/streams/stream_compaction_test.cpp +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -15,20 +15,16 @@ */ #include -#include #include #include #include -#include #include #include #include #include #include -#include - auto constexpr NaN = std::numeric_limits::quiet_NaN(); auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY; auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST; diff --git a/cpp/tests/streams/strings/factory_test.cpp b/cpp/tests/streams/strings/factory_test.cpp index 36e595ab9fa..449e0830b0c 100644 --- a/cpp/tests/streams/strings/factory_test.cpp +++ b/cpp/tests/streams/strings/factory_test.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include diff --git a/cpp/tests/streams/strings/reverse_test.cpp b/cpp/tests/streams/strings/reverse_test.cpp index 4b4d0a7aff5..154e1c1b715 100644 --- a/cpp/tests/streams/strings/reverse_test.cpp +++ b/cpp/tests/streams/strings/reverse_test.cpp @@ -21,7 +21,6 @@ #include #include -#include class StringsReverseTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp index cf81dc6fb42..9f168abcb31 100644 --- a/cpp/tests/streams/transform_test.cpp +++ b/cpp/tests/streams/transform_test.cpp @@ -15,17 +15,11 @@ */ #include -#include #include #include -#include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index 9c0ecaa52c0..06b9c2fa3c1 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -23,10 +23,8 @@ #include #include #include -#include #include #include -#include #include #include diff --git a/cpp/tests/strings/combine/concatenate_tests.cpp b/cpp/tests/strings/combine/concatenate_tests.cpp index bb57d6f5e8a..e53adcf373a 100644 --- a/cpp/tests/strings/combine/concatenate_tests.cpp +++ b/cpp/tests/strings/combine/concatenate_tests.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/strings/combine/join_list_elements_tests.cpp b/cpp/tests/strings/combine/join_list_elements_tests.cpp index 00317146088..c92f1cfc8f8 100644 --- a/cpp/tests/strings/combine/join_list_elements_tests.cpp +++ b/cpp/tests/strings/combine/join_list_elements_tests.cpp @@ -22,7 +22,6 @@ #include #include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/strings/concatenate_tests.cpp b/cpp/tests/strings/concatenate_tests.cpp index 5cf4015b9e9..51dcc60d95e 100644 --- a/cpp/tests/strings/concatenate_tests.cpp +++ b/cpp/tests/strings/concatenate_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ #include #include -#include #include diff --git a/cpp/tests/strings/datetime_tests.cpp b/cpp/tests/strings/datetime_tests.cpp index b3dc3010c67..da0db0fc056 100644 --- a/cpp/tests/strings/datetime_tests.cpp +++ b/cpp/tests/strings/datetime_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp index 7e0338f1bf4..37b25d9b287 100644 --- a/cpp/tests/strings/extract_tests.cpp +++ b/cpp/tests/strings/extract_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp index 4821a7fa999..7eb4b32d078 100644 --- a/cpp/tests/strings/findall_tests.cpp +++ b/cpp/tests/strings/findall_tests.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -28,8 +27,6 @@ #include -#include - struct StringsFindallTests : public cudf::test::BaseFixture {}; TEST_F(StringsFindallTests, FindallTest) diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 79054551498..b788c05c152 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -23,8 +23,6 @@ #include #include -#include - struct StringsConvertTest : public cudf::test::BaseFixture {}; template diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 26bcfe8028d..c08effdb969 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -24,9 +24,6 @@ #include #include -#include -#include - #include #include diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp index 219bd6d8b01..a34ff25cb69 100644 --- a/cpp/tests/structs/structs_column_tests.cpp +++ b/cpp/tests/structs/structs_column_tests.cpp @@ -17,28 +17,18 @@ #include #include #include -#include #include #include #include -#include #include -#include #include -#include -#include -#include -#include #include #include #include -#include #include -#include -#include #include #include diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index c33eedf9bd9..c0df2f01a63 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -14,21 +14,15 @@ * limitations under the License. */ -#include "cudf_test/default_stream.hpp" - #include #include #include -#include #include #include #include -#include -#include #include #include -#include #include #include diff --git a/cpp/tests/table/row_operators_tests.cpp b/cpp/tests/table/row_operators_tests.cpp index 5fa63c47cf0..216c4d7b6bb 100644 --- a/cpp/tests/table/row_operators_tests.cpp +++ b/cpp/tests/table/row_operators_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/table/table_tests.cpp b/cpp/tests/table/table_tests.cpp index 1637ba7d7d3..363f1a0ba5d 100644 --- a/cpp/tests/table/table_tests.cpp +++ b/cpp/tests/table/table_tests.cpp @@ -17,17 +17,14 @@ #include #include #include -#include #include #include #include -#include #include #include #include -#include template using column_wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/text/minhash_tests.cpp b/cpp/tests/text/minhash_tests.cpp index e23f3f6e7d8..ef35a4472cf 100644 --- a/cpp/tests/text/minhash_tests.cpp +++ b/cpp/tests/text/minhash_tests.cpp @@ -21,13 +21,9 @@ #include #include -#include #include -#include -#include - #include struct MinHashTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/text/ngrams_tests.cpp b/cpp/tests/text/ngrams_tests.cpp index 1acb4fc4265..c72c7cfc80e 100644 --- a/cpp/tests/text/ngrams_tests.cpp +++ b/cpp/tests/text/ngrams_tests.cpp @@ -28,8 +28,6 @@ #include -#include - struct TextGenerateNgramsTest : public cudf::test::BaseFixture {}; TEST_F(TextGenerateNgramsTest, Ngrams) diff --git a/cpp/tests/text/normalize_tests.cpp b/cpp/tests/text/normalize_tests.cpp index b0d41004e7e..2515cc917fa 100644 --- a/cpp/tests/text/normalize_tests.cpp +++ b/cpp/tests/text/normalize_tests.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/text/stemmer_tests.cpp b/cpp/tests/text/stemmer_tests.cpp index a343913411c..82c4bf53cfc 100644 --- a/cpp/tests/text/stemmer_tests.cpp +++ b/cpp/tests/text/stemmer_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/text/subword_tests.cpp b/cpp/tests/text/subword_tests.cpp index a615780c02a..782551ad66e 100644 --- a/cpp/tests/text/subword_tests.cpp +++ b/cpp/tests/text/subword_tests.cpp @@ -19,13 +19,11 @@ #include #include -#include #include #include #include -#include #include // Global environment for temporary files diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp index 2684123c08a..9437440f34d 100644 --- a/cpp/tests/transform/bools_to_mask_test.cpp +++ b/cpp/tests/transform/bools_to_mask_test.cpp @@ -20,10 +20,8 @@ #include #include -#include #include #include -#include #include diff --git a/cpp/tests/transform/nans_to_null_test.cpp b/cpp/tests/transform/nans_to_null_test.cpp index ba16c100e7a..42ca872a936 100644 --- a/cpp/tests/transform/nans_to_null_test.cpp +++ b/cpp/tests/transform/nans_to_null_test.cpp @@ -17,12 +17,10 @@ #include #include #include -#include #include #include #include -#include template struct NaNsToNullTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/transpose/transpose_test.cpp b/cpp/tests/transpose/transpose_test.cpp index 5a88c402b8c..7797b2b2cf8 100644 --- a/cpp/tests/transpose/transpose_test.cpp +++ b/cpp/tests/transpose/transpose_test.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/types/traits_test.cpp b/cpp/tests/types/traits_test.cpp index 0d9092c33da..46468af515d 100644 --- a/cpp/tests/types/traits_test.cpp +++ b/cpp/tests/types/traits_test.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index 45b89b76070..ed4c1340dbb 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -20,18 +20,15 @@ #include #include -#include #include #include #include #include #include -#include #include #include -#include #include static auto const test_timestamps_D = std::vector{ diff --git a/cpp/tests/unary/math_ops_test.cpp b/cpp/tests/unary/math_ops_test.cpp index 5bfbf70d5f9..663a919f3f4 100644 --- a/cpp/tests/unary/math_ops_test.cpp +++ b/cpp/tests/unary/math_ops_test.cpp @@ -22,10 +22,6 @@ #include #include #include -#include -#include - -#include #include diff --git a/cpp/tests/unary/unary_ops_test.cpp b/cpp/tests/unary/unary_ops_test.cpp index e7477c34642..3c616461c74 100644 --- a/cpp/tests/unary/unary_ops_test.cpp +++ b/cpp/tests/unary/unary_ops_test.cpp @@ -23,7 +23,6 @@ #include #include -#include #include template diff --git a/cpp/tests/utilities/random_seed.cpp b/cpp/tests/utilities/random_seed.cpp index ab5a31ce161..555d89b7dc5 100644 --- a/cpp/tests/utilities/random_seed.cpp +++ b/cpp/tests/utilities/random_seed.cpp @@ -13,8 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include -#include +#include namespace cudf { namespace test { diff --git a/cpp/tests/utilities_tests/column_debug_tests.cpp b/cpp/tests/utilities_tests/column_debug_tests.cpp index 7aa05af4591..2a57d678d07 100644 --- a/cpp/tests/utilities_tests/column_debug_tests.cpp +++ b/cpp/tests/utilities_tests/column_debug_tests.cpp @@ -16,12 +16,9 @@ #include #include -#include #include #include -#include - #include #include diff --git a/cpp/tests/utilities_tests/column_utilities_tests.cpp b/cpp/tests/utilities_tests/column_utilities_tests.cpp index 9d6d5ccb9b5..a13ce825d0b 100644 --- a/cpp/tests/utilities_tests/column_utilities_tests.cpp +++ b/cpp/tests/utilities_tests/column_utilities_tests.cpp @@ -17,20 +17,16 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include -#include - template struct ColumnUtilitiesTest : public cudf::test::BaseFixture { cudf::test::UniformRandomGenerator random; diff --git a/cpp/tests/utilities_tests/column_wrapper_tests.cpp b/cpp/tests/utilities_tests/column_wrapper_tests.cpp index 479c6687e75..339678f3be8 100644 --- a/cpp/tests/utilities_tests/column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/column_wrapper_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp index 5e3fda5e6f7..ff50dc39979 100644 --- a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index fecb896f95a..c1c5776be74 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/utilities_tests/type_list_tests.cpp b/cpp/tests/utilities_tests/type_list_tests.cpp index 849457056e4..6c3a84763a0 100644 --- a/cpp/tests/utilities_tests/type_list_tests.cpp +++ b/cpp/tests/utilities_tests/type_list_tests.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include using namespace cudf::test; // this will make reading code way easier @@ -23,6 +22,7 @@ namespace { // Work around to remove parentheses surrounding a type template struct argument_type; + template struct argument_type { using type = U; From bf5b778c265b3bfa712f509be0ba268216bcf3d0 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Mon, 28 Oct 2024 23:51:03 -0500 Subject: [PATCH 42/76] Check `num_children() == 0` in `Column.from_column_view` (#17193) This fixes a bug where `Column.from_column_view` is not verifying the existence of a string column's offsets child column prior to accessing it, resulting in a segmentation fault when passing a `column_view` from `Column.view()` to `Column.from_column_view(...)`. The issue can be reproduced with: ``` import cudf from cudf.core.column.column import as_column df = cudf.DataFrame({'a': cudf.Series([[]], dtype=cudf.core.dtypes.ListDtype('string'))}) s = df['a'] col = as_column(s) col2 = cudf._lib.column.Column.back_and_forth(col) print(col) print(col2) ``` where `back_and_forth` is defined as: ``` @staticmethod def back_and_forth(Column input_column): cdef column_view input_column_view = input_column.view() return Column.from_column_view(input_column_view, input_column) ``` I don't have the expertise to write the appropriate tests for this without introducing the `back_and_forth` function as an API, which seems undesirable. Authors: - Christopher Harris (https://github.com/cwharris) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17193 --- python/cudf/cudf/_lib/column.pyx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 065655505b8..94dbdf5534d 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -688,15 +688,18 @@ cdef class Column: # special case for string column is_string_column = (cv.type().id() == libcudf_types.type_id.STRING) if is_string_column: - # get the size from offset child column (device to host copy) - offsets_column_index = 0 - offset_child_column = cv.child(offsets_column_index) - if offset_child_column.size() == 0: + if cv.num_children() == 0: base_nbytes = 0 else: - chars_size = get_element( - offset_child_column, offset_child_column.size()-1).value - base_nbytes = chars_size + # get the size from offset child column (device to host copy) + offsets_column_index = 0 + offset_child_column = cv.child(offsets_column_index) + if offset_child_column.size() == 0: + base_nbytes = 0 + else: + chars_size = get_element( + offset_child_column, offset_child_column.size()-1).value + base_nbytes = chars_size if data_ptr: if data_owner is None: From 4b0a634e51f64c68f107683d82ebfea87290efaf Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Tue, 29 Oct 2024 10:42:07 -0400 Subject: [PATCH 43/76] Auto assign PR to author (#16969) I think most PRs remain unassigned, so this PR auto assigns the PR to the PR author. I think this will help keep our project boards up-to-date. Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16969 --- .github/workflows/auto-assign.yml | 17 +++++++++++++++++ .github/workflows/labeler.yml | 1 + 2 files changed, 18 insertions(+) create mode 100644 .github/workflows/auto-assign.yml diff --git a/.github/workflows/auto-assign.yml b/.github/workflows/auto-assign.yml new file mode 100644 index 00000000000..673bebd4ecc --- /dev/null +++ b/.github/workflows/auto-assign.yml @@ -0,0 +1,17 @@ +name: "Auto Assign PR" + +on: + pull_request_target: + types: + - opened + - reopened + - synchronize + +jobs: + add_assignees: + runs-on: ubuntu-latest + steps: + - uses: actions-ecosystem/action-add-assignees@v1 + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + assignees: ${{ github.actor }} diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 31e78f82a62..f5cb71bfc14 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -1,4 +1,5 @@ name: "Pull Request Labeler" + on: - pull_request_target From 3775f7b9f6509bd0f2f75c46edb60abf2522de86 Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Tue, 29 Oct 2024 14:49:52 +0000 Subject: [PATCH 44/76] Fixed unused attribute compilation error for GCC 13 (#17188) With `decltype(&pclose) ` for the destructor type of the `unique_ptr`, gcc makes the signature inherit the attributes of `pclose`. The compiler then ignores this attribute as it doesn't apply within the context with a warning, and since we have `-Werror` on for ignored attributes, the build fails. This happens on gcc 13.2.0. Authors: - Basit Ayantunde (https://github.com/lamarrr) Approvers: - David Wendt (https://github.com/davidwendt) - Paul Mattione (https://github.com/pmattione-nvidia) - Shruti Shivakumar (https://github.com/shrshi) URL: https://github.com/rapidsai/cudf/pull/17188 --- cpp/benchmarks/io/cuio_common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index fe24fb58728..45b46005c47 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -186,7 +186,7 @@ std::string exec_cmd(std::string_view cmd) std::fflush(nullptr); // Switch stderr and stdout to only capture stderr auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null"); - std::unique_ptr pipe(popen(redirected_cmd.c_str(), "r"), pclose); + std::unique_ptr pipe(popen(redirected_cmd.c_str(), "r"), pclose); CUDF_EXPECTS(pipe != nullptr, "popen() failed"); std::array buffer; From ddfb2848d6b7bb3cd03b8377f349f401030f558c Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Tue, 29 Oct 2024 09:51:19 -0700 Subject: [PATCH 45/76] Support storing `precision` of decimal types in `Schema` class (#17176) In Spark, the `DecimalType` has a specific number of digits to represent the numbers. However, when creating a data Schema, only type and name of the column are stored, thus we lose that precision information. As such, it would be difficult to reconstruct the original decimal types from cudf's `Schema` instance. This PR adds a `precision` member variable to the `Schema` class in cudf Java, allowing it to store the precision number of the original decimal column. Partially contributes to https://github.com/NVIDIA/spark-rapids/issues/11560. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/17176 --- java/src/main/java/ai/rapids/cudf/Schema.java | 77 +++++++++++++++++-- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Schema.java b/java/src/main/java/ai/rapids/cudf/Schema.java index 76b2799aad6..6da591d659f 100644 --- a/java/src/main/java/ai/rapids/cudf/Schema.java +++ b/java/src/main/java/ai/rapids/cudf/Schema.java @@ -29,26 +29,52 @@ public class Schema { public static final Schema INFERRED = new Schema(); private final DType topLevelType; + + /** + * Default value for precision value, when it is not specified or the column type is not decimal. + */ + private static final int UNKNOWN_PRECISION = -1; + + /** + * Store precision for the top level column, only applicable if the column is a decimal type. + *

+ * This variable is not designed to be used by any libcudf's APIs since libcudf does not support + * precisions for fixed point numbers. + * Instead, it is used only to pass down the precision values from Spark's DecimalType to the + * JNI level, where some JNI functions require these values to perform their operations. + */ + private final int topLevelPrecision; + private final List childNames; private final List childSchemas; private boolean flattened = false; private String[] flattenedNames; private DType[] flattenedTypes; + private int[] flattenedPrecisions; private int[] flattenedCounts; private Schema(DType topLevelType, + int topLevelPrecision, List childNames, List childSchemas) { this.topLevelType = topLevelType; + this.topLevelPrecision = topLevelPrecision; this.childNames = childNames; this.childSchemas = childSchemas; } + private Schema(DType topLevelType, + List childNames, + List childSchemas) { + this(topLevelType, UNKNOWN_PRECISION, childNames, childSchemas); + } + /** * Inferred schema. */ private Schema() { topLevelType = null; + topLevelPrecision = UNKNOWN_PRECISION; childNames = null; childSchemas = null; } @@ -104,14 +130,17 @@ private void flattenIfNeeded() { if (flatLen == 0) { flattenedNames = null; flattenedTypes = null; + flattenedPrecisions = null; flattenedCounts = null; } else { String[] names = new String[flatLen]; DType[] types = new DType[flatLen]; + int[] precisions = new int[flatLen]; int[] counts = new int[flatLen]; - collectFlattened(names, types, counts, 0); + collectFlattened(names, types, precisions, counts, 0); flattenedNames = names; flattenedTypes = types; + flattenedPrecisions = precisions; flattenedCounts = counts; } flattened = true; @@ -128,19 +157,20 @@ private int flattenedLength(int startingLength) { return startingLength; } - private int collectFlattened(String[] names, DType[] types, int[] counts, int offset) { + private int collectFlattened(String[] names, DType[] types, int[] precisions, int[] counts, int offset) { if (childSchemas != null) { for (int i = 0; i < childSchemas.size(); i++) { Schema child = childSchemas.get(i); names[offset] = childNames.get(i); types[offset] = child.topLevelType; + precisions[offset] = child.topLevelPrecision; if (child.childNames != null) { counts[offset] = child.childNames.size(); } else { counts[offset] = 0; } offset++; - offset = this.childSchemas.get(i).collectFlattened(names, types, counts, offset); + offset = this.childSchemas.get(i).collectFlattened(names, types, precisions, counts, offset); } } return offset; @@ -226,6 +256,22 @@ public int[] getFlattenedTypeScales() { return ret; } + /** + * Get decimal precisions of the columns' types flattened from all levels in schema by + * depth-first traversal. + *

+ * This is used to pass down the decimal precisions from Spark to only the JNI layer, where + * some JNI functions require precision values to perform their operations. + * Decimal precisions should not be consumed by any libcudf's APIs since libcudf does not + * support precisions for fixed point numbers. + * + * @return An array containing decimal precision of all columns in schema. + */ + public int[] getFlattenedDecimalPrecisions() { + flattenIfNeeded(); + return flattenedPrecisions; + } + /** * Get the types of the columns in schema flattened from all levels by depth-first traversal. * @return An array containing types of all columns in schema. @@ -307,11 +353,13 @@ public HostColumnVector.DataType asHostDataType() { public static class Builder { private final DType topLevelType; + private final int topLevelPrecision; private final List names; private final List types; - private Builder(DType topLevelType) { + private Builder(DType topLevelType, int topLevelPrecision) { this.topLevelType = topLevelType; + this.topLevelPrecision = topLevelPrecision; if (topLevelType == DType.STRUCT || topLevelType == DType.LIST) { // There can be children names = new ArrayList<>(); @@ -322,14 +370,19 @@ private Builder(DType topLevelType) { } } + private Builder(DType topLevelType) { + this(topLevelType, UNKNOWN_PRECISION); + } + /** * Add a new column * @param type the type of column to add * @param name the name of the column to add (Ignored for list types) + * @param precision the decimal precision, only applicable for decimal types * @return the builder for the new column. This should really only be used when the type * passed in is a LIST or a STRUCT. */ - public Builder addColumn(DType type, String name) { + public Builder addColumn(DType type, String name, int precision) { if (names == null) { throw new IllegalStateException("A column of type " + topLevelType + " cannot have children"); @@ -340,21 +393,31 @@ public Builder addColumn(DType type, String name) { if (names.contains(name)) { throw new IllegalStateException("Cannot add duplicate names to a schema"); } - Builder ret = new Builder(type); + Builder ret = new Builder(type, precision); types.add(ret); names.add(name); return ret; } + public Builder addColumn(DType type, String name) { + return addColumn(type, name, UNKNOWN_PRECISION); + } + /** * Adds a single column to the current schema. addColumn is preferred as it can be used * to support nested types. * @param type the type of the column. * @param name the name of the column. + * @param precision the decimal precision, only applicable for decimal types. * @return this for chaining. */ + public Builder column(DType type, String name, int precision) { + addColumn(type, name, precision); + return this; + } + public Builder column(DType type, String name) { - addColumn(type, name); + addColumn(type, name, UNKNOWN_PRECISION); return this; } From 63b773e73a9f582e2cfa75ae04bcad8608e8f03a Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 29 Oct 2024 13:25:55 -0500 Subject: [PATCH 46/76] Add in new java API for raw host memory allocation (#17197) This is the first patch in a series of patches that should make it so that all java host memory allocations go through the DefaultHostMemoryAllocator unless another allocator is explicitly provided. This is to make it simpler to track/control host memory usage. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Jason Lowe (https://github.com/jlowe) - Alessandro Bellina (https://github.com/abellina) URL: https://github.com/rapidsai/cudf/pull/17197 --- .../main/java/ai/rapids/cudf/HostMemoryBuffer.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java index e4106574a19..d792459901c 100644 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -155,6 +155,16 @@ public static HostMemoryBuffer allocate(long bytes) { return allocate(bytes, defaultPreferPinned); } + /** + * Allocate host memory bypassing the default allocator. This is intended to only be used by other allocators. + * Pinned memory will not be used for these allocations. + * @param bytes size in bytes to allocate + * @return the newly created buffer + */ + public static HostMemoryBuffer allocateRaw(long bytes) { + return new HostMemoryBuffer(UnsafeMemoryAccessor.allocate(bytes), bytes); + } + /** * Create a host buffer that is memory-mapped to a file. * @param path path to the file to map into host memory From 52d7e638af366a2384868c41a7ece889d7ada30e Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Tue, 29 Oct 2024 19:59:13 +0000 Subject: [PATCH 47/76] Unified binary_ops and ast benchmarks parameter names (#17200) This merge request unifies the parameter names of the AST and BINARYOP benchmark suites and makes it easier to perform parameter sweeps and compare the outputs of both benchmarks. Authors: - Basit Ayantunde (https://github.com/lamarrr) Approvers: - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/17200 --- cpp/benchmarks/ast/transform.cpp | 26 +++++++++---------- cpp/benchmarks/binaryop/binaryop.cpp | 26 +++++++++---------- cpp/benchmarks/binaryop/compiled_binaryop.cpp | 12 ++++----- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp index 7fe61054a26..2533ea9611c 100644 --- a/cpp/benchmarks/ast/transform.cpp +++ b/cpp/benchmarks/ast/transform.cpp @@ -52,14 +52,14 @@ enum class TreeType { template static void BM_ast_transform(nvbench::state& state) { - auto const table_size = static_cast(state.get_int64("table_size")); + auto const num_rows = static_cast(state.get_int64("num_rows")); auto const tree_levels = static_cast(state.get_int64("tree_levels")); // Create table data auto const n_cols = reuse_columns ? 1 : tree_levels + 1; auto const source_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), - row_count{table_size}, + row_count{num_rows}, Nullable ? std::optional{0.5} : std::nullopt); auto table = source_table->view(); @@ -99,8 +99,8 @@ static void BM_ast_transform(nvbench::state& state) auto const& expression_tree_root = expressions.back(); // Use the number of bytes read from global memory - state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); - state.add_global_memory_writes(table_size); + state.add_global_memory_reads(static_cast(num_rows) * (tree_levels + 1)); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); @@ -109,15 +109,15 @@ static void BM_ast_transform(nvbench::state& state) template static void BM_string_compare_ast_transform(nvbench::state& state) { - auto const string_width = static_cast(state.get_int64("string_width")); - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); - auto const hit_rate = static_cast(state.get_int64("hit_rate")); + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const tree_levels = static_cast(state.get_int64("tree_levels")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); - CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons"); // Create table data - auto const num_cols = num_comparisons * 2; + auto const num_cols = tree_levels * 2; std::vector> columns; std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { @@ -150,7 +150,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state) expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1])); std::for_each(thrust::make_counting_iterator(1), - thrust::make_counting_iterator(num_comparisons), + thrust::make_counting_iterator(tree_levels), [&](size_t idx) { auto const& lhs = expressions.back(); auto const& rhs = expressions.emplace_back( @@ -177,7 +177,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state) NVBENCH_BENCH(name) \ .set_name(#name) \ .add_int64_axis("tree_levels", {1, 5, 10}) \ - .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) AST_TRANSFORM_BENCHMARK_DEFINE( ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false); @@ -202,7 +202,7 @@ AST_TRANSFORM_BENCHMARK_DEFINE( .set_name(#name) \ .add_int64_axis("string_width", {32, 64, 128, 256}) \ .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ - .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("tree_levels", {1, 2, 3, 4}) \ .add_int64_axis("hit_rate", {50, 100}) AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and, diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp index 35e41c6c2a4..75c91d270a7 100644 --- a/cpp/benchmarks/binaryop/binaryop.cpp +++ b/cpp/benchmarks/binaryop/binaryop.cpp @@ -40,18 +40,18 @@ enum class TreeType { template static void BM_binaryop_transform(nvbench::state& state) { - auto const table_size{static_cast(state.get_int64("table_size"))}; + auto const num_rows{static_cast(state.get_int64("num_rows"))}; auto const tree_levels{static_cast(state.get_int64("tree_levels"))}; // Create table data auto const n_cols = reuse_columns ? 1 : tree_levels + 1; auto const source_table = create_sequence_table( - cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{table_size}); + cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{num_rows}); cudf::table_view table{*source_table}; // Use the number of bytes read from global memory - state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); - state.add_global_memory_writes(table_size); + state.add_global_memory_reads(static_cast(num_rows) * (tree_levels + 1)); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { // Execute tree that chains additions like (((a + b) + c) + d) @@ -74,15 +74,15 @@ static void BM_binaryop_transform(nvbench::state& state) template static void BM_string_compare_binaryop_transform(nvbench::state& state) { - auto const string_width = static_cast(state.get_int64("string_width")); - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); - auto const hit_rate = static_cast(state.get_int64("hit_rate")); + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const tree_levels = static_cast(state.get_int64("tree_levels")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); - CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons"); // Create table data - auto const num_cols = num_comparisons * 2; + auto const num_cols = tree_levels * 2; std::vector> columns; std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { @@ -113,7 +113,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state) cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream); std::for_each( thrust::make_counting_iterator(1), - thrust::make_counting_iterator(num_comparisons), + thrust::make_counting_iterator(tree_levels), [&](size_t idx) { std::unique_ptr comparison = cudf::binary_operation( table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream); @@ -133,7 +133,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state) } \ NVBENCH_BENCH(name) \ .add_int64_axis("tree_levels", {1, 2, 5, 10}) \ - .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique, int32_t, @@ -158,7 +158,7 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique, .set_name(#name) \ .add_int64_axis("string_width", {32, 64, 128, 256}) \ .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ - .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("tree_levels", {1, 2, 3, 4}) \ .add_int64_axis("hit_rate", {50, 100}) STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform, diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index cd3c3871a2e..426f44a4fa1 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -23,10 +23,10 @@ template void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) { - auto const table_size = static_cast(state.get_int64("table_size")); + auto const num_rows = static_cast(state.get_int64("num_rows")); auto const source_table = create_random_table( - {cudf::type_to_id(), cudf::type_to_id()}, row_count{table_size}); + {cudf::type_to_id(), cudf::type_to_id()}, row_count{num_rows}); auto lhs = cudf::column_view(source_table->get_column(0)); auto rhs = cudf::column_view(source_table->get_column(1)); @@ -37,9 +37,9 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) cudf::binary_operation(lhs, rhs, binop, output_dtype); // use number of bytes read and written to global memory - state.add_global_memory_reads(table_size); - state.add_global_memory_reads(table_size); - state.add_global_memory_writes(table_size); + state.add_global_memory_reads(num_rows); + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); }); @@ -55,7 +55,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) } \ NVBENCH_BENCH(name) \ .set_name("compiled_binary_op_" BM_STRINGIFY(name)) \ - .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000}) #define build_name(a, b, c, d) a##_##b##_##c##_##d From 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Tue, 29 Oct 2024 16:31:27 -0400 Subject: [PATCH 48/76] [BUG] Replace `repo_token` with `github_token` in Auto Assign PR GHA (#17203) The Auto Assign GHA workflow fails with this [error](https://github.com/rapidsai/cudf/actions/runs/11580081781). This PR fixes this error. xref #16969 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17203 --- .github/workflows/auto-assign.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/auto-assign.yml b/.github/workflows/auto-assign.yml index 673bebd4ecc..1bf4ac08b69 100644 --- a/.github/workflows/auto-assign.yml +++ b/.github/workflows/auto-assign.yml @@ -13,5 +13,5 @@ jobs: steps: - uses: actions-ecosystem/action-add-assignees@v1 with: - repo_token: "${{ secrets.GITHUB_TOKEN }}" + github_token: "${{ secrets.GITHUB_TOKEN }}" assignees: ${{ github.actor }} From eeb4d2780163794f4b705062e49dbdc3283ebce0 Mon Sep 17 00:00:00 2001 From: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com> Date: Tue, 29 Oct 2024 17:12:43 -0400 Subject: [PATCH 49/76] Parquet reader list microkernel (#16538) This PR refactors fixed-width parquet list reader decoding into its own set of micro-kernels, templatizing the existing fixed-width microkernels. When skipping rows for lists, this will skip ahead the decoding of the definition, repetition, and dictionary rle_streams as well. The list kernel uses 128 threads per block and 71 registers per thread, so I've changed the launch_bounds to enforce a minimum of 8 blocks per SM. This causes a small register spill but the benchmarks are still faster, as seen below: DEVICE_BUFFER list benchmarks (decompress + decode, not bound by IO): run_length 1, cardinality 0, no byte_limit: 24.7% faster run_length 32, cardinality 1000, no byte_limit: 18.3% faster run_length 1, cardinality 0, 500kb byte_limit: 57% faster run_length 32, cardinality 1000, 500kb byte_limit: 53% faster Compressed list of ints on hard drive: 5.5% faster Sample real data on hard drive (many columns not lists): 0.5% faster Authors: - Paul Mattione (https://github.com/pmattione-nvidia) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - https://github.com/nvdbaranec - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16538 --- cpp/src/io/parquet/decode_fixed.cu | 585 ++++++++++++++++++++++++----- cpp/src/io/parquet/page_hdr.cu | 17 +- cpp/src/io/parquet/parquet_gpu.hpp | 10 + cpp/src/io/parquet/reader_impl.cpp | 45 +++ cpp/src/io/parquet/rle_stream.cuh | 81 ++-- 5 files changed, 615 insertions(+), 123 deletions(-) diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu index 4522ea7fe56..45380e6ea20 100644 --- a/cpp/src/io/parquet/decode_fixed.cu +++ b/cpp/src/io/parquet/decode_fixed.cu @@ -37,7 +37,14 @@ struct block_scan_results { }; template -static __device__ void scan_block_exclusive_sum(int thread_bit, block_scan_results& results) +using block_scan_temp_storage = int[decode_block_size / cudf::detail::warp_size]; + +// Similar to CUB, must __syncthreads() after calling if reusing temp_storage +template +__device__ inline static void scan_block_exclusive_sum( + int thread_bit, + block_scan_results& results, + block_scan_temp_storage& temp_storage) { int const t = threadIdx.x; int const warp_index = t / cudf::detail::warp_size; @@ -45,15 +52,19 @@ static __device__ void scan_block_exclusive_sum(int thread_bit, block_scan_resul uint32_t const lane_mask = (uint32_t(1) << warp_lane) - 1; uint32_t warp_bits = ballot(thread_bit); - scan_block_exclusive_sum(warp_bits, warp_lane, warp_index, lane_mask, results); + scan_block_exclusive_sum( + warp_bits, warp_lane, warp_index, lane_mask, results, temp_storage); } +// Similar to CUB, must __syncthreads() after calling if reusing temp_storage template -__device__ static void scan_block_exclusive_sum(uint32_t warp_bits, - int warp_lane, - int warp_index, - uint32_t lane_mask, - block_scan_results& results) +__device__ static void scan_block_exclusive_sum( + uint32_t warp_bits, + int warp_lane, + int warp_index, + uint32_t lane_mask, + block_scan_results& results, + block_scan_temp_storage& temp_storage) { // Compute # warps constexpr int num_warps = decode_block_size / cudf::detail::warp_size; @@ -64,49 +75,64 @@ __device__ static void scan_block_exclusive_sum(uint32_t warp_bits, results.thread_count_within_warp = __popc(results.warp_bits & lane_mask); // Share the warp counts amongst the block threads - __shared__ int warp_counts[num_warps]; - if (warp_lane == 0) { warp_counts[warp_index] = results.warp_count; } - __syncthreads(); + if (warp_lane == 0) { temp_storage[warp_index] = results.warp_count; } + __syncthreads(); // Sync to share counts between threads/warps // Compute block-wide results results.block_count = 0; results.thread_count_within_block = results.thread_count_within_warp; for (int warp_idx = 0; warp_idx < num_warps; ++warp_idx) { - results.block_count += warp_counts[warp_idx]; - if (warp_idx < warp_index) { results.thread_count_within_block += warp_counts[warp_idx]; } + results.block_count += temp_storage[warp_idx]; + if (warp_idx < warp_index) { results.thread_count_within_block += temp_storage[warp_idx]; } } } -template -__device__ inline void gpuDecodeFixedWidthValues( +template +__device__ void gpuDecodeFixedWidthValues( page_state_s* s, state_buf* const sb, int start, int end, int t) { constexpr int num_warps = block_size / cudf::detail::warp_size; constexpr int max_batch_size = num_warps * cudf::detail::warp_size; - PageNestingDecodeInfo* nesting_info_base = s->nesting_info; - int const dtype = s->col.physical_type; + // nesting level that is storing actual leaf values + int const leaf_level_index = s->col.max_nesting_depth - 1; + auto const data_out = s->nesting_info[leaf_level_index].data_out; + + int const dtype = s->col.physical_type; + uint32_t const dtype_len = s->dtype_len; + + int const skipped_leaf_values = s->page.skipped_leaf_values; // decode values int pos = start; while (pos < end) { int const batch_size = min(max_batch_size, end - pos); - int const target_pos = pos + batch_size; - int const src_pos = pos + t; + int const thread_pos = pos + t; - // the position in the output column/buffer - int dst_pos = sb->nz_idx[rolling_index(src_pos)] - s->first_row; + // Index from value buffer (doesn't include nulls) to final array (has gaps for nulls) + int const dst_pos = [&]() { + int dst_pos = sb->nz_idx[rolling_index(thread_pos)]; + if constexpr (!has_lists_t) { dst_pos -= s->first_row; } + return dst_pos; + }(); // target_pos will always be properly bounded by num_rows, but dst_pos may be negative (values // before first_row) in the flat hierarchy case. - if (src_pos < target_pos && dst_pos >= 0) { + if (thread_pos < target_pos && dst_pos >= 0) { // nesting level that is storing actual leaf values - int const leaf_level_index = s->col.max_nesting_depth - 1; - uint32_t dtype_len = s->dtype_len; - void* dst = - nesting_info_base[leaf_level_index].data_out + static_cast(dst_pos) * dtype_len; + // src_pos represents the logical row position we want to read from. But in the case of + // nested hierarchies (lists), there is no 1:1 mapping of rows to values. So src_pos + // has to take into account the # of values we have to skip in the page to get to the + // desired logical row. For flat hierarchies, skipped_leaf_values will always be 0. + int const src_pos = [&]() { + if constexpr (has_lists_t) { return thread_pos + skipped_leaf_values; } + return thread_pos; + }(); + + void* const dst = data_out + (static_cast(dst_pos) * dtype_len); + if (s->col.logical_type.has_value() && s->col.logical_type->type == LogicalType::DECIMAL) { switch (dtype) { case INT32: gpuOutputFast(s, sb, src_pos, static_cast(dst)); break; @@ -145,15 +171,15 @@ __device__ inline void gpuDecodeFixedWidthValues( } } -template +template struct decode_fixed_width_values_func { __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) { - gpuDecodeFixedWidthValues(s, sb, start, end, t); + gpuDecodeFixedWidthValues(s, sb, start, end, t); } }; -template +template __device__ inline void gpuDecodeFixedWidthSplitValues( page_state_s* s, state_buf* const sb, int start, int end, int t) { @@ -161,10 +187,15 @@ __device__ inline void gpuDecodeFixedWidthSplitValues( constexpr int num_warps = block_size / warp_size; constexpr int max_batch_size = num_warps * warp_size; - PageNestingDecodeInfo* nesting_info_base = s->nesting_info; - int const dtype = s->col.physical_type; - auto const data_len = thrust::distance(s->data_start, s->data_end); - auto const num_values = data_len / s->dtype_len_in; + // nesting level that is storing actual leaf values + int const leaf_level_index = s->col.max_nesting_depth - 1; + auto const data_out = s->nesting_info[leaf_level_index].data_out; + + int const dtype = s->col.physical_type; + auto const data_len = thrust::distance(s->data_start, s->data_end); + auto const num_values = data_len / s->dtype_len_in; + + int const skipped_leaf_values = s->page.skipped_leaf_values; // decode values int pos = start; @@ -172,21 +203,34 @@ __device__ inline void gpuDecodeFixedWidthSplitValues( int const batch_size = min(max_batch_size, end - pos); int const target_pos = pos + batch_size; - int const src_pos = pos + t; + int const thread_pos = pos + t; // the position in the output column/buffer - int dst_pos = sb->nz_idx[rolling_index(src_pos)] - s->first_row; + // Index from value buffer (doesn't include nulls) to final array (has gaps for nulls) + int const dst_pos = [&]() { + int dst_pos = sb->nz_idx[rolling_index(thread_pos)]; + if constexpr (!has_lists_t) { dst_pos -= s->first_row; } + return dst_pos; + }(); // target_pos will always be properly bounded by num_rows, but dst_pos may be negative (values // before first_row) in the flat hierarchy case. - if (src_pos < target_pos && dst_pos >= 0) { - // nesting level that is storing actual leaf values - int const leaf_level_index = s->col.max_nesting_depth - 1; + if (thread_pos < target_pos && dst_pos >= 0) { + // src_pos represents the logical row position we want to read from. But in the case of + // nested hierarchies (lists), there is no 1:1 mapping of rows to values. So src_pos + // has to take into account the # of values we have to skip in the page to get to the + // desired logical row. For flat hierarchies, skipped_leaf_values will always be 0. + int const src_pos = [&]() { + if constexpr (has_lists_t) { + return thread_pos + skipped_leaf_values; + } else { + return thread_pos; + } + }(); - uint32_t dtype_len = s->dtype_len; - uint8_t const* src = s->data_start + src_pos; - uint8_t* dst = - nesting_info_base[leaf_level_index].data_out + static_cast(dst_pos) * dtype_len; + uint32_t const dtype_len = s->dtype_len; + uint8_t const* const src = s->data_start + src_pos; + uint8_t* const dst = data_out + static_cast(dst_pos) * dtype_len; auto const is_decimal = s->col.logical_type.has_value() and s->col.logical_type->type == LogicalType::DECIMAL; @@ -239,11 +283,11 @@ __device__ inline void gpuDecodeFixedWidthSplitValues( } } -template +template struct decode_fixed_width_split_values_func { __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) { - gpuDecodeFixedWidthSplitValues(s, sb, start, end, t); + gpuDecodeFixedWidthSplitValues(s, sb, start, end, t); } }; @@ -274,12 +318,14 @@ static __device__ int gpuUpdateValidityAndRowIndicesNested( int const batch_size = min(max_batch_size, capped_target_value_count - value_count); // definition level - int d = 1; - if (t >= batch_size) { - d = -1; - } else if (def) { - d = static_cast(def[rolling_index(value_count + t)]); - } + int const d = [&]() { + if (t >= batch_size) { + return -1; + } else if (def) { + return static_cast(def[rolling_index(value_count + t)]); + } + return 1; + }(); int const thread_value_count = t; int const block_value_count = batch_size; @@ -340,6 +386,7 @@ static __device__ int gpuUpdateValidityAndRowIndicesNested( if (is_valid) { int const dst_pos = value_count + thread_value_count; int const src_pos = max_depth_valid_count + thread_valid_count; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; } // update stuff @@ -396,16 +443,16 @@ static __device__ int gpuUpdateValidityAndRowIndicesFlat( int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); // use definition level & row bounds to determine if is valid - int is_valid; - if (t >= batch_size) { - is_valid = 0; - } else if (def) { - int const def_level = - static_cast(def[rolling_index(value_count + t)]); - is_valid = ((def_level > 0) && in_row_bounds) ? 1 : 0; - } else { - is_valid = in_row_bounds; - } + int const is_valid = [&]() { + if (t >= batch_size) { + return 0; + } else if (def) { + int const def_level = + static_cast(def[rolling_index(value_count + t)]); + return ((def_level > 0) && in_row_bounds) ? 1 : 0; + } + return in_row_bounds; + }(); // thread and block validity count using block_scan = cub::BlockScan; @@ -447,8 +494,9 @@ static __device__ int gpuUpdateValidityAndRowIndicesFlat( // output offset if (is_valid) { - int const dst_pos = value_count + thread_value_count; - int const src_pos = valid_count + thread_valid_count; + int const dst_pos = value_count + thread_value_count; + int const src_pos = valid_count + thread_valid_count; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; } @@ -460,7 +508,7 @@ static __device__ int gpuUpdateValidityAndRowIndicesFlat( if (t == 0) { // update valid value count for decoding and total # of values we've processed ni.valid_count = valid_count; - ni.value_count = value_count; // TODO: remove? this is unused in the non-list path + ni.value_count = value_count; s->nz_count = valid_count; s->input_value_count = value_count; s->input_row_count = value_count; @@ -533,6 +581,239 @@ static __device__ int gpuUpdateValidityAndRowIndicesNonNullable(int32_t target_v return valid_count; } +template +static __device__ int gpuUpdateValidityAndRowIndicesLists(int32_t target_value_count, + page_state_s* s, + state_buf* sb, + level_t const* const def, + level_t const* const rep, + int t) +{ + constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int max_batch_size = num_warps * cudf::detail::warp_size; + + // how many (input) values we've processed in the page so far, prior to this loop iteration + int value_count = s->input_value_count; + + // how many rows we've processed in the page so far + int input_row_count = s->input_row_count; + + // cap by last row so that we don't process any rows past what we want to output. + int const first_row = s->first_row; + int const last_row = first_row + s->num_rows; + + int const row_index_lower_bound = s->row_index_lower_bound; + int const max_depth = s->col.max_nesting_depth - 1; + int max_depth_valid_count = s->nesting_info[max_depth].valid_count; + + int const warp_index = t / cudf::detail::warp_size; + int const warp_lane = t % cudf::detail::warp_size; + bool const is_first_lane = (warp_lane == 0); + + __syncthreads(); + __shared__ block_scan_temp_storage temp_storage; + + while (value_count < target_value_count) { + bool const within_batch = value_count + t < target_value_count; + + // get definition level, use repetition level to get start/end depth + // different for each thread, as each thread has a different r/d + auto const [def_level, start_depth, end_depth] = [&]() { + if (!within_batch) { return cuda::std::make_tuple(-1, -1, -1); } + + int const level_index = rolling_index(value_count + t); + int const rep_level = static_cast(rep[level_index]); + int const start_depth = s->nesting_info[rep_level].start_depth; + + if constexpr (!nullable) { + return cuda::std::make_tuple(-1, start_depth, max_depth); + } else { + if (def != nullptr) { + int const def_level = static_cast(def[level_index]); + return cuda::std::make_tuple( + def_level, start_depth, s->nesting_info[def_level].end_depth); + } else { + return cuda::std::make_tuple(1, start_depth, max_depth); + } + } + }(); + + // Determine value count & row index + // track (page-relative) row index for the thread so we can compare against input bounds + // keep track of overall # of rows we've read. + int const is_new_row = start_depth == 0 ? 1 : 0; + int num_prior_new_rows, total_num_new_rows; + { + block_scan_results new_row_scan_results; + scan_block_exclusive_sum(is_new_row, new_row_scan_results, temp_storage); + __syncthreads(); + num_prior_new_rows = new_row_scan_results.thread_count_within_block; + total_num_new_rows = new_row_scan_results.block_count; + } + + int const row_index = input_row_count + ((num_prior_new_rows + is_new_row) - 1); + input_row_count += total_num_new_rows; + int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); + + // VALUE COUNT: + // in_nesting_bounds: if at a nesting level where we need to add value indices + // the bounds: from current rep to the rep AT the def depth + int in_nesting_bounds = ((0 >= start_depth && 0 <= end_depth) && in_row_bounds) ? 1 : 0; + int thread_value_count_within_warp, warp_value_count, thread_value_count, block_value_count; + { + block_scan_results value_count_scan_results; + scan_block_exclusive_sum( + in_nesting_bounds, value_count_scan_results, temp_storage); + __syncthreads(); + + thread_value_count_within_warp = value_count_scan_results.thread_count_within_warp; + warp_value_count = value_count_scan_results.warp_count; + thread_value_count = value_count_scan_results.thread_count_within_block; + block_value_count = value_count_scan_results.block_count; + } + + // iterate by depth + for (int d_idx = 0; d_idx <= max_depth; d_idx++) { + auto& ni = s->nesting_info[d_idx]; + + // everything up to the max_def_level is a non-null value + int const is_valid = [&](int input_def_level) { + if constexpr (nullable) { + return ((input_def_level >= ni.max_def_level) && in_nesting_bounds) ? 1 : 0; + } else { + return in_nesting_bounds; + } + }(def_level); + + // VALID COUNT: + // Not all values visited by this block will represent a value at this nesting level. + // the validity bit for thread t might actually represent output value t-6. + // the correct position for thread t's bit is thread_value_count. + uint32_t const warp_valid_mask = + WarpReduceOr32((uint32_t)is_valid << thread_value_count_within_warp); + int thread_valid_count, block_valid_count; + { + auto thread_mask = (uint32_t(1) << thread_value_count_within_warp) - 1; + + block_scan_results valid_count_scan_results; + scan_block_exclusive_sum(warp_valid_mask, + warp_lane, + warp_index, + thread_mask, + valid_count_scan_results, + temp_storage); + __syncthreads(); + thread_valid_count = valid_count_scan_results.thread_count_within_block; + block_valid_count = valid_count_scan_results.block_count; + } + + // compute warp and thread value counts for the -next- nesting level. we need to + // do this for lists so that we can emit an offset for the -current- nesting level. + // the offset for the current nesting level == current length of the next nesting level + int next_thread_value_count_within_warp = 0, next_warp_value_count = 0; + int next_thread_value_count = 0, next_block_value_count = 0; + int next_in_nesting_bounds = 0; + if (d_idx < max_depth) { + // NEXT DEPTH VALUE COUNT: + next_in_nesting_bounds = + ((d_idx + 1 >= start_depth) && (d_idx + 1 <= end_depth) && in_row_bounds) ? 1 : 0; + { + block_scan_results next_value_count_scan_results; + scan_block_exclusive_sum( + next_in_nesting_bounds, next_value_count_scan_results, temp_storage); + __syncthreads(); + + next_thread_value_count_within_warp = + next_value_count_scan_results.thread_count_within_warp; + next_warp_value_count = next_value_count_scan_results.warp_count; + next_thread_value_count = next_value_count_scan_results.thread_count_within_block; + next_block_value_count = next_value_count_scan_results.block_count; + } + + // STORE OFFSET TO THE LIST LOCATION + // if we're -not- at a leaf column and we're within nesting/row bounds + // and we have a valid data_out pointer, it implies this is a list column, so + // emit an offset. + if (in_nesting_bounds && ni.data_out != nullptr) { + const auto& next_ni = s->nesting_info[d_idx + 1]; + int const idx = ni.value_count + thread_value_count; + cudf::size_type const ofs = + next_ni.value_count + next_thread_value_count + next_ni.page_start_value; + + (reinterpret_cast(ni.data_out))[idx] = ofs; + } + } + + // validity is processed per-warp (on lane 0's) + // thi is because when atomic writes are needed, they are 32-bit operations + // + // lists always read and write to the same bounds + // (that is, read and write positions are already pre-bounded by first_row/num_rows). + // since we are about to write the validity vector + // here we need to adjust our computed mask to take into account the write row bounds. + if constexpr (nullable) { + if (is_first_lane && (ni.valid_map != nullptr) && (warp_value_count > 0)) { + // absolute bit offset into the output validity map + // is cumulative sum of warp_value_count at the given nesting depth + // DON'T subtract by first_row: since it's lists it's not 1-row-per-value + int const bit_offset = ni.valid_map_offset + thread_value_count; + + store_validity(bit_offset, ni.valid_map, warp_valid_mask, warp_value_count); + } + + if (t == 0) { ni.null_count += block_value_count - block_valid_count; } + } + + // if this is valid and we're at the leaf, output dst_pos + // Read value_count before the sync, so that when thread 0 modifies it we've already read its + // value + int const current_value_count = ni.value_count; + __syncthreads(); // guard against modification of ni.value_count below + if (d_idx == max_depth) { + if (is_valid) { + int const dst_pos = current_value_count + thread_value_count; + int const src_pos = max_depth_valid_count + thread_valid_count; + int const output_index = rolling_index(src_pos); + + // Index from rolling buffer of values (which doesn't include nulls) to final array (which + // includes gaps for nulls) + sb->nz_idx[output_index] = dst_pos; + } + max_depth_valid_count += block_valid_count; + } + + // update stuff + if (t == 0) { + ni.value_count += block_value_count; + ni.valid_map_offset += block_value_count; + } + __syncthreads(); // sync modification of ni.value_count + + // propagate value counts for the next depth level + block_value_count = next_block_value_count; + thread_value_count = next_thread_value_count; + in_nesting_bounds = next_in_nesting_bounds; + warp_value_count = next_warp_value_count; + thread_value_count_within_warp = next_thread_value_count_within_warp; + } // END OF DEPTH LOOP + + int const batch_size = min(max_batch_size, target_value_count - value_count); + value_count += batch_size; + } + + if (t == 0) { + // update valid value count for decoding and total # of values we've processed + s->nesting_info[max_depth].valid_count = max_depth_valid_count; + s->nz_count = max_depth_valid_count; + s->input_value_count = value_count; + + // If we have lists # rows != # values + s->input_row_count = input_row_count; + } + + return max_depth_valid_count; +} + // is the page marked nullable or not __device__ inline bool is_nullable(page_state_s* s) { @@ -560,6 +841,23 @@ __device__ inline bool maybe_has_nulls(page_state_s* s) return run_val != s->col.max_level[lvl]; } +template +__device__ int skip_decode(stream_type& parquet_stream, int num_to_skip, int t) +{ + // it could be that (e.g.) we skip 5000 but starting at row 4000 we have a run of length 2000: + // in that case skip_decode() only skips 4000, and we have to process the remaining 1000 up front + // modulo 2 * block_size of course, since that's as many as we process at once + int num_skipped = parquet_stream.skip_decode(t, num_to_skip); + while (num_skipped < num_to_skip) { + // TODO: Instead of decoding, skip within the run to the appropriate location + auto const to_decode = min(rolling_buf_size, num_to_skip - num_skipped); + num_skipped += parquet_stream.decode_next(t, to_decode); + __syncthreads(); + } + + return num_skipped; +} + /** * @brief Kernel for computing fixed width non dictionary column data stored in the pages * @@ -579,9 +877,10 @@ template + bool has_lists_t, + template typename DecodeValuesFunc> -CUDF_KERNEL void __launch_bounds__(decode_block_size_t) +CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8) gpuDecodePageDataGeneric(PageInfo* pages, device_span chunks, size_t min_row, @@ -621,31 +920,29 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. if (s->num_rows == 0) { return; } - DecodeValuesFunc decode_values; + DecodeValuesFunc decode_values; - bool const nullable = is_nullable(s); - bool const should_process_nulls = nullable && maybe_has_nulls(s); + bool const should_process_nulls = is_nullable(s) && maybe_has_nulls(s); // shared buffer. all shared memory is suballocated out of here - // constexpr int shared_rep_size = has_lists_t ? cudf::util::round_up_unsafe(rle_run_buffer_size * - // sizeof(rle_run), size_t{16}) : 0; + constexpr int shared_rep_size = + has_lists_t + ? cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}) + : 0; constexpr int shared_dict_size = has_dict_t ? cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}) : 0; constexpr int shared_def_size = cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}); - constexpr int shared_buf_size = /*shared_rep_size +*/ shared_dict_size + shared_def_size; + constexpr int shared_buf_size = shared_rep_size + shared_dict_size + shared_def_size; __shared__ __align__(16) uint8_t shared_buf[shared_buf_size]; // setup all shared memory buffers - int shared_offset = 0; - /* - rle_run *rep_runs = reinterpret_cast*>(shared_buf + shared_offset); - if constexpr (has_lists_t){ - shared_offset += shared_rep_size; - } - */ + int shared_offset = 0; + rle_run* rep_runs = reinterpret_cast*>(shared_buf + shared_offset); + if constexpr (has_lists_t) { shared_offset += shared_rep_size; } + rle_run* dict_runs = reinterpret_cast*>(shared_buf + shared_offset); if constexpr (has_dict_t) { shared_offset += shared_dict_size; } rle_run* def_runs = reinterpret_cast*>(shared_buf + shared_offset); @@ -660,38 +957,51 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) def, s->page.num_input_values); } - /* + rle_stream rep_decoder{rep_runs}; level_t* const rep = reinterpret_cast(pp->lvl_decode_buf[level_type::REPETITION]); - if constexpr(has_lists_t){ + if constexpr (has_lists_t) { rep_decoder.init(s->col.level_bits[level_type::REPETITION], s->abs_lvl_start[level_type::REPETITION], s->abs_lvl_end[level_type::REPETITION], rep, s->page.num_input_values); } - */ rle_stream dict_stream{dict_runs}; if constexpr (has_dict_t) { dict_stream.init( s->dict_bits, s->data_start, s->data_end, sb->dict_idx, s->page.num_input_values); } - __syncthreads(); // We use two counters in the loop below: processed_count and valid_count. - // - processed_count: number of rows out of num_input_values that we have decoded so far. + // - processed_count: number of values out of num_input_values that we have decoded so far. // the definition stream returns the number of total rows it has processed in each call // to decode_next and we accumulate in process_count. - // - valid_count: number of non-null rows we have decoded so far. In each iteration of the + // - valid_count: number of non-null values we have decoded so far. In each iteration of the // loop below, we look at the number of valid items (which could be all for non-nullable), // and valid_count is that running count. int processed_count = 0; int valid_count = 0; + + // Skip ahead in the decoding so that we don't repeat work (skipped_leaf_values = 0 for non-lists) + if constexpr (has_lists_t) { + auto const skipped_leaf_values = s->page.skipped_leaf_values; + if (skipped_leaf_values > 0) { + if (should_process_nulls) { + skip_decode(def_decoder, skipped_leaf_values, t); + } + processed_count = skip_decode(rep_decoder, skipped_leaf_values, t); + if constexpr (has_dict_t) { + skip_decode(dict_stream, skipped_leaf_values, t); + } + } + } + // the core loop. decode batches of level stream data using rle_stream objects // and pass the results to gpuDecodeValues // For chunked reads we may not process all of the rows on the page; if not stop early - int last_row = s->first_row + s->num_rows; + int const last_row = s->first_row + s->num_rows; while ((s->error == 0) && (processed_count < s->page.num_input_values) && (s->input_row_count <= last_row)) { int next_valid_count; @@ -701,7 +1011,12 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) processed_count += def_decoder.decode_next(t); __syncthreads(); - if constexpr (has_nesting_t) { + if constexpr (has_lists_t) { + rep_decoder.decode_next(t); + __syncthreads(); + next_valid_count = gpuUpdateValidityAndRowIndicesLists( + processed_count, s, sb, def, rep, t); + } else if constexpr (has_nesting_t) { next_valid_count = gpuUpdateValidityAndRowIndicesNested( processed_count, s, sb, def, t); } else { @@ -713,9 +1028,16 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) // this function call entirely since all it will ever generate is a mapping of (i -> i) for // nz_idx. gpuDecodeFixedWidthValues would be the only work that happens. else { - processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = - gpuUpdateValidityAndRowIndicesNonNullable(processed_count, s, sb, t); + if constexpr (has_lists_t) { + processed_count += rep_decoder.decode_next(t); + __syncthreads(); + next_valid_count = gpuUpdateValidityAndRowIndicesLists( + processed_count, s, sb, nullptr, rep, t); + } else { + processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); + next_valid_count = + gpuUpdateValidityAndRowIndicesNonNullable(processed_count, s, sb, t); + } } __syncthreads(); @@ -745,6 +1067,7 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { @@ -754,12 +1077,23 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, dim3 dim_grid(pages.size(), 1); // 1 threadblock per page if (level_type_size == 1) { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -769,17 +1103,29 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, decode_kernel_mask::FIXED_WIDTH_NO_DICT, false, false, + false, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); } } else { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -789,6 +1135,7 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, decode_kernel_mask::FIXED_WIDTH_NO_DICT, false, false, + false, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -802,6 +1149,7 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { @@ -811,12 +1159,23 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -826,17 +1185,29 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa decode_kernel_mask::FIXED_WIDTH_DICT, true, false, + false, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); } } else { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -846,6 +1217,7 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa decode_kernel_mask::FIXED_WIDTH_DICT, true, false, + true, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -860,6 +1232,7 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { @@ -869,12 +1242,23 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -884,17 +1268,29 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT, false, false, + false, decode_fixed_width_split_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); } } else { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -904,6 +1300,7 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT, false, false, + false, decode_fixed_width_split_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index d604642be54..52d53cb8225 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -183,17 +183,20 @@ __device__ decode_kernel_mask kernel_mask_for_page(PageInfo const& page, return decode_kernel_mask::STRING; } - if (!is_list(chunk) && !is_byte_array(chunk) && !is_boolean(chunk)) { + if (!is_byte_array(chunk) && !is_boolean(chunk)) { if (page.encoding == Encoding::PLAIN) { - return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED - : decode_kernel_mask::FIXED_WIDTH_NO_DICT; + return is_list(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_LIST + : is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_NO_DICT; } else if (page.encoding == Encoding::PLAIN_DICTIONARY || page.encoding == Encoding::RLE_DICTIONARY) { - return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_NESTED - : decode_kernel_mask::FIXED_WIDTH_DICT; + return is_list(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_LIST + : is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_DICT; } else if (page.encoding == Encoding::BYTE_STREAM_SPLIT) { - return is_nested(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED - : decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT; + return is_list(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_LIST + : is_nested(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED + : decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT; } } diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index be502b581af..dba24b553e6 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -220,6 +220,10 @@ enum class decode_kernel_mask { (1 << 9), // Same as above but for nested, fixed-width data FIXED_WIDTH_NO_DICT_NESTED = (1 << 10), // Run decode kernel for fixed width non-dictionary pages FIXED_WIDTH_DICT_NESTED = (1 << 11), // Run decode kernel for fixed width dictionary pages + FIXED_WIDTH_DICT_LIST = (1 << 12), // Run decode kernel for fixed width dictionary pages + FIXED_WIDTH_NO_DICT_LIST = (1 << 13), // Run decode kernel for fixed width non-dictionary pages + BYTE_STREAM_SPLIT_FIXED_WIDTH_LIST = + (1 << 14), // Run decode kernel for BYTE_STREAM_SPLIT encoded data for fixed width lists }; // mask representing all the ways in which a string can be encoded @@ -908,6 +912,7 @@ void DecodeDeltaLengthByteArray(cudf::detail::hostdevice_span pages, * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding * @param[in] has_nesting Whether or not the data contains nested (but not list) data. + * @param[in] is_list Whether or not the data contains list data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -917,6 +922,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream); @@ -932,6 +938,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding * @param[in] has_nesting Whether or not the data contains nested (but not list) data. + * @param[in] is_list Whether or not the data contains list data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -941,6 +948,7 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream); @@ -956,6 +964,7 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding * @param[in] has_nesting Whether or not the data contains nested (but not list) data. + * @param[in] is_list Whether or not the data contains list data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -965,6 +974,7 @@ void DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index fed1a309064..689386b8957 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -272,6 +272,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, false, + false, error_code.data(), streams[s_idx++]); } @@ -284,6 +285,20 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, true, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch byte stream split decoder, for list columns + if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_LIST) != 0) { + DecodeSplitPageFixedWidthData(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + true, error_code.data(), streams[s_idx++]); } @@ -307,6 +322,20 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, false, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder for lists + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT_LIST) != 0) { + DecodePageDataFixed(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + true, error_code.data(), streams[s_idx++]); } @@ -319,6 +348,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, true, + false, error_code.data(), streams[s_idx++]); } @@ -331,6 +361,20 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, false, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder with dictionaries for lists + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT_LIST) != 0) { + DecodePageDataFixedDict(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + true, error_code.data(), streams[s_idx++]); } @@ -343,6 +387,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, true, + false, error_code.data(), streams[s_idx++]); } diff --git a/cpp/src/io/parquet/rle_stream.cuh b/cpp/src/io/parquet/rle_stream.cuh index 4a0791d5c54..69e783a89d0 100644 --- a/cpp/src/io/parquet/rle_stream.cuh +++ b/cpp/src/io/parquet/rle_stream.cuh @@ -19,6 +19,7 @@ #include "parquet_gpu.hpp" #include +#include namespace cudf::io::parquet::detail { @@ -216,6 +217,26 @@ struct rle_stream { decode_index = -1; // signals the first iteration. Nothing to decode. } + __device__ inline int get_rle_run_info(rle_run& run) + { + run.start = cur; + run.level_run = get_vlq32(run.start, end); + + // run_bytes includes the header size + int run_bytes = run.start - cur; + if (is_literal_run(run.level_run)) { + // from the parquet spec: literal runs always come in multiples of 8 values. + run.size = (run.level_run >> 1) * 8; + run_bytes += util::div_rounding_up_unsafe(run.size * level_bits, 8); + } else { + // repeated value run + run.size = (run.level_run >> 1); + run_bytes += util::div_rounding_up_unsafe(level_bits, 8); + } + + return run_bytes; + } + __device__ inline void fill_run_batch() { // decode_index == -1 means we are on the very first decode iteration for this stream. @@ -226,31 +247,14 @@ struct rle_stream { while (((decode_index == -1 && fill_index < num_rle_stream_decode_warps) || fill_index < decode_index + run_buffer_size) && cur < end) { - auto& run = runs[rolling_index(fill_index)]; - // Encoding::RLE + // Pass by reference to fill the runs shared memory with the run data + auto& run = runs[rolling_index(fill_index)]; + int const run_bytes = get_rle_run_info(run); - // bytes for the varint header - uint8_t const* _cur = cur; - int const level_run = get_vlq32(_cur, end); - // run_bytes includes the header size - int run_bytes = _cur - cur; - - // literal run - if (is_literal_run(level_run)) { - // from the parquet spec: literal runs always come in multiples of 8 values. - run.size = (level_run >> 1) * 8; - run_bytes += ((run.size * level_bits) + 7) >> 3; - } - // repeated value run - else { - run.size = (level_run >> 1); - run_bytes += ((level_bits) + 7) >> 3; - } - run.output_pos = output_pos; - run.start = _cur; - run.level_run = level_run; run.remaining = run.size; + run.output_pos = output_pos; + cur += run_bytes; output_pos += run.size; fill_index++; @@ -372,6 +376,39 @@ struct rle_stream { return values_processed_shared; } + __device__ inline int skip_runs(int target_count) + { + // we want to process all runs UP TO BUT NOT INCLUDING the run that overlaps with the skip + // amount so threads spin like crazy on fill_run_batch(), skipping writing unnecessary run info. + // then when it hits the one that matters, we don't process it at all and bail as if we never + // started basically we're setting up the rle_stream vars necessary to start fill_run_batch for + // the first time + while (cur < end) { + rle_run run; + int run_bytes = get_rle_run_info(run); + + if ((output_pos + run.size) > target_count) { + return output_pos; // bail! we've reached the starting run + } + + // skip this run + output_pos += run.size; + cur += run_bytes; + } + + return output_pos; // we skipped everything + } + + __device__ inline int skip_decode(int t, int count) + { + int const output_count = min(count, total_values - cur_values); + + // if level_bits == 0, there's nothing to do + // a very common case: columns with no nulls, especially if they are non-nested + cur_values = (level_bits == 0) ? output_count : skip_runs(output_count); + return cur_values; + } + __device__ inline int decode_next(int t) { return decode_next(t, max_output_values); } }; From 6328ad679947eb5cbc352c345a28f079aa6b8005 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Wed, 30 Oct 2024 17:17:47 +0800 Subject: [PATCH 50/76] Make ai.rapids.cudf.HostMemoryBuffer#copyFromStream public. (#17179) This is the first pr of [a larger one](https://github.com/NVIDIA/spark-rapids-jni/pull/2532) to introduce a new serialization format. It make `ai.rapids.cudf.HostMemoryBuffer#copyFromStream` public. For more background, see https://github.com/NVIDIA/spark-rapids-jni/issues/2496 Authors: - Renjie Liu (https://github.com/liurenjie1024) - Jason Lowe (https://github.com/jlowe) Approvers: - Jason Lowe (https://github.com/jlowe) - Alessandro Bellina (https://github.com/abellina) URL: https://github.com/rapidsai/cudf/pull/17179 --- java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java index d792459901c..bfb959b12c1 100644 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java @@ -255,8 +255,10 @@ public final void copyFromHostBuffer(long destOffset, HostMemoryBuffer srcData, * @param destOffset offset in bytes in this buffer to start copying to * @param in input stream to copy bytes from * @param byteLength number of bytes to copy + * @throws EOFException If there are not enough bytes in the stream to copy. + * @throws IOException If there is an error reading from the stream. */ - final void copyFromStream(long destOffset, InputStream in, long byteLength) throws IOException { + public final void copyFromStream(long destOffset, InputStream in, long byteLength) throws IOException { addressOutOfBoundsCheck(address + destOffset, byteLength, "copy from stream"); byte[] arrayBuffer = new byte[(int) Math.min(1024 * 128, byteLength)]; long left = byteLength; @@ -264,7 +266,7 @@ final void copyFromStream(long destOffset, InputStream in, long byteLength) thro int amountToCopy = (int) Math.min(arrayBuffer.length, left); int amountRead = in.read(arrayBuffer, 0, amountToCopy); if (amountRead < 0) { - throw new EOFException(); + throw new EOFException("Unexpected end of stream, expected " + left + " more bytes"); } setBytes(destOffset, arrayBuffer, 0, amountRead); destOffset += amountRead; From 5ee7d7caaf459e7d30597e6ea2dd1904c50d12fc Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 30 Oct 2024 10:13:28 -0400 Subject: [PATCH 51/76] [no ci] Add empty-columns section to the libcudf developer guide (#17183) Adds a section on `Empty Columns` to the libcudf DEVELOPER_GUIDE Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Basit Ayantunde (https://github.com/lamarrr) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17183 --- cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 311539efbfc..1c1052487f2 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -1483,6 +1483,17 @@ struct, and therefore `cudf::struct_view` is the data type of a `cudf::column` o `cudf::type_dispatcher` dispatches to the `struct_view` data type when invoked on a `STRUCT` column. +# Empty Columns + +The libcudf columns support empty, typed content. These columns have no data and no validity mask. +Empty strings or lists columns may or may not contain a child offsets column. +It is undefined behavior (UB) to access the offsets child of an empty strings or lists column. +Nested columns like lists and structs may require other children columns to provide the +nested structure of the empty types. + +Use `cudf::make_empty_column()` to create fixed-width and strings columns. +Use `cudf::empty_like()` to create an empty column from an existing `cudf::column_view`. + # cuIO: file reading and writing cuIO is a component of libcudf that provides GPU-accelerated reading and writing of data file From 6c2eb4ef03c56413000b3d28574868b68c86181f Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 30 Oct 2024 10:38:38 -0500 Subject: [PATCH 52/76] Upgrade nvcomp to 4.1.0.6 (#17201) This updates cudf to use nvcomp 4.1.0.6. The version is updated in rapids-cmake in https://github.com/rapidsai/rapids-cmake/pull/709. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cudf/pull/17201 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/libcudf/conda_build_config.yaml | 2 +- dependencies.yaml | 8 ++++---- python/libcudf/pyproject.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index c3716c4759a..f3bbaaa8779 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -58,7 +58,7 @@ dependencies: - numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 -- nvcomp==4.0.1 +- nvcomp==4.1.0.6 - nvtx>=0.2.1 - openpyxl - packaging diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 38e131e79cb..38c5b361f70 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -56,7 +56,7 @@ dependencies: - numba-cuda>=0.0.13 - numpy>=1.23,<3.0a0 - numpydoc -- nvcomp==4.0.1 +- nvcomp==4.1.0.6 - nvtx>=0.2.1 - openpyxl - packaging diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index dc75eb4b252..c78ca326005 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -35,7 +35,7 @@ spdlog_version: - ">=1.14.1,<1.15" nvcomp_version: - - "=4.0.1" + - "=4.1.0.6" zlib_version: - ">=1.2.13" diff --git a/dependencies.yaml b/dependencies.yaml index 7c7aa43fa41..bd1a5deb878 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -399,21 +399,21 @@ dependencies: - output_types: conda packages: # Align nvcomp version with rapids-cmake - - nvcomp==4.0.1 + - nvcomp==4.1.0.6 specific: - output_types: [requirements, pyproject] matrices: - matrix: cuda: "12.*" packages: - - nvidia-nvcomp-cu12==4.0.1 + - nvidia-nvcomp-cu12==4.1.0.6 - matrix: cuda: "11.*" packages: - - nvidia-nvcomp-cu11==4.0.1 + - nvidia-nvcomp-cu11==4.1.0.6 - matrix: packages: - - nvidia-nvcomp==4.0.1 + - nvidia-nvcomp==4.1.0.6 rapids_build_skbuild: common: - output_types: [conda, requirements, pyproject] diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 84660cbc276..c6d9ae56467 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -38,7 +38,7 @@ classifiers = [ "Environment :: GPU :: NVIDIA CUDA", ] dependencies = [ - "nvidia-nvcomp==4.0.1", + "nvidia-nvcomp==4.1.0.6", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] From 0b9277b3abe014b9ab1cf7f849c36b21c2422bbe Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 30 Oct 2024 13:52:56 -0400 Subject: [PATCH 53/76] Fix bug in recovering invalid lines in JSONL inputs (#17098) Addresses #16999 Authors: - Shruti Shivakumar (https://github.com/shrshi) - Karthikeyan (https://github.com/karthikeyann) - Nghia Truong (https://github.com/ttnghia) Approvers: - Basit Ayantunde (https://github.com/lamarrr) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17098 --- cpp/src/io/json/read_json.cu | 44 +++++++++++++++++++++++--------- cpp/src/io/json/read_json.hpp | 1 + cpp/tests/io/json/json_test.cpp | 18 +++++++++++++ cpp/tests/io/json/json_utils.cuh | 1 + 4 files changed, 52 insertions(+), 12 deletions(-) diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index 8a740ae17ef..2bc15ea19cb 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -127,7 +128,8 @@ datasource::owning_buffer get_record_range_raw_input( std::size_t const total_source_size = sources_size(sources, 0, 0); auto constexpr num_delimiter_chars = 1; - auto const num_extra_delimiters = num_delimiter_chars * (sources.size() - 1); + auto const delimiter = reader_opts.get_delimiter(); + auto const num_extra_delimiters = num_delimiter_chars * sources.size(); compression_type const reader_compression = reader_opts.get_compression(); std::size_t const chunk_offset = reader_opts.get_byte_range_offset(); std::size_t chunk_size = reader_opts.get_byte_range_size(); @@ -135,10 +137,10 @@ datasource::owning_buffer get_record_range_raw_input( CUDF_EXPECTS(total_source_size ? chunk_offset < total_source_size : !chunk_offset, "Invalid offsetting", std::invalid_argument); - auto should_load_all_sources = !chunk_size || chunk_size >= total_source_size - chunk_offset; - chunk_size = should_load_all_sources ? total_source_size - chunk_offset : chunk_size; + auto should_load_till_last_source = !chunk_size || chunk_size >= total_source_size - chunk_offset; + chunk_size = should_load_till_last_source ? total_source_size - chunk_offset : chunk_size; - int num_subchunks_prealloced = should_load_all_sources ? 0 : max_subchunks_prealloced; + int num_subchunks_prealloced = should_load_till_last_source ? 0 : max_subchunks_prealloced; std::size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); // The allocation for single source compressed input is estimated by assuming a ~4:1 @@ -155,17 +157,17 @@ datasource::owning_buffer get_record_range_raw_input( // Offset within buffer indicating first read position std::int64_t buffer_offset = 0; - auto readbufspan = - ingest_raw_input(bufspan, sources, reader_compression, chunk_offset, chunk_size, stream); + auto readbufspan = ingest_raw_input( + bufspan, sources, reader_compression, chunk_offset, chunk_size, delimiter, stream); auto const shift_for_nonzero_offset = std::min(chunk_offset, 1); auto const first_delim_pos = - chunk_offset == 0 ? 0 : find_first_delimiter(readbufspan, '\n', stream); + chunk_offset == 0 ? 0 : find_first_delimiter(readbufspan, delimiter, stream); if (first_delim_pos == -1) { // return empty owning datasource buffer auto empty_buf = rmm::device_buffer(0, stream); return datasource::owning_buffer(std::move(empty_buf)); - } else if (!should_load_all_sources) { + } else if (!should_load_till_last_source) { // Find next delimiter std::int64_t next_delim_pos = -1; std::size_t next_subchunk_start = chunk_offset + chunk_size; @@ -180,14 +182,15 @@ datasource::owning_buffer get_record_range_raw_input( reader_compression, next_subchunk_start, size_per_subchunk, + delimiter, stream); - next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset; + next_delim_pos = find_first_delimiter(readbufspan, delimiter, stream) + buffer_offset; next_subchunk_start += size_per_subchunk; } if (next_delim_pos < buffer_offset) { if (next_subchunk_start >= total_source_size) { // If we have reached the end of source list but the source does not terminate with a - // newline character + // delimiter character next_delim_pos = buffer_offset + readbufspan.size(); } else { // Our buffer_size estimate is insufficient to read until the end of the line! We need to @@ -209,10 +212,26 @@ datasource::owning_buffer get_record_range_raw_input( reinterpret_cast(buffer.data()) + first_delim_pos + shift_for_nonzero_offset, next_delim_pos - first_delim_pos - shift_for_nonzero_offset); } + + // Add delimiter to end of buffer - possibly adding an empty line to the input buffer - iff we are + // reading till the end of the last source i.e. should_load_till_last_source is true Note that the + // table generated from the JSONL input remains unchanged since empty lines are ignored by the + // parser. + size_t num_chars = readbufspan.size() - first_delim_pos - shift_for_nonzero_offset; + if (num_chars) { + auto last_char = delimiter; + cudf::detail::cuda_memcpy_async( + device_span(reinterpret_cast(buffer.data()), buffer.size()) + .subspan(readbufspan.size(), 1), + host_span(&last_char, 1, false), + stream); + num_chars++; + } + return datasource::owning_buffer( std::move(buffer), reinterpret_cast(buffer.data()) + first_delim_pos + shift_for_nonzero_offset, - readbufspan.size() - first_delim_pos - shift_for_nonzero_offset); + num_chars); } // Helper function to read the current batch using byte range offsets and size @@ -245,6 +264,7 @@ device_span ingest_raw_input(device_span buffer, compression_type compression, std::size_t range_offset, std::size_t range_size, + char delimiter, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); @@ -296,7 +316,7 @@ device_span ingest_raw_input(device_span buffer, if (sources.size() > 1) { static_assert(num_delimiter_chars == 1, "Currently only single-character delimiters are supported"); - auto const delimiter_source = thrust::make_constant_iterator('\n'); + auto const delimiter_source = thrust::make_constant_iterator(delimiter); auto const d_delimiter_map = cudf::detail::make_device_uvector_async( delimiter_map, stream, cudf::get_current_device_resource_ref()); thrust::scatter(rmm::exec_policy_nosync(stream), diff --git a/cpp/src/io/json/read_json.hpp b/cpp/src/io/json/read_json.hpp index 982190eecb5..4def69cc629 100644 --- a/cpp/src/io/json/read_json.hpp +++ b/cpp/src/io/json/read_json.hpp @@ -56,6 +56,7 @@ device_span ingest_raw_input(device_span buffer, compression_type compression, size_t range_offset, size_t range_size, + char delimiter, rmm::cuda_stream_view stream); /** diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp index 5f070bd53b9..b58ca56e066 100644 --- a/cpp/tests/io/json/json_test.cpp +++ b/cpp/tests/io/json/json_test.cpp @@ -2973,4 +2973,22 @@ TEST_F(JsonReaderTest, JsonDtypeSchema) cudf::test::debug_output_level::ALL_ERRORS); } +TEST_F(JsonReaderTest, LastRecordInvalid) +{ + std::string data = R"({"key": "1"} + {"key": "})"; + std::map schema{{"key", {dtype()}}}; + auto opts = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .dtypes(schema) + .lines(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .build(); + auto const result = cudf::io::read_json(opts); + + EXPECT_EQ(result.metadata.schema_info[0].name, "key"); + cudf::test::strings_column_wrapper expected{{"1", ""}, cudf::test::iterators::nulls_at({1})}; + CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), cudf::table_view{{expected}}); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/json/json_utils.cuh b/cpp/tests/io/json/json_utils.cuh index 9383797d91b..c31bb2d24e0 100644 --- a/cpp/tests/io/json/json_utils.cuh +++ b/cpp/tests/io/json/json_utils.cuh @@ -52,6 +52,7 @@ std::vector split_byte_range_reading( reader_opts.get_compression(), reader_opts.get_byte_range_offset(), reader_opts.get_byte_range_size(), + reader_opts.get_delimiter(), stream); // Note: we cannot reuse cudf::io::json::detail::find_first_delimiter since the // return type of that function is size_type. However, when the chunk_size is From 7157de71f25a1b4f6da0374a4f268c249a80ae1b Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 30 Oct 2024 19:15:54 +0000 Subject: [PATCH 54/76] Add conversion from cudf-polars expressions to libcudf ast for parquet filters (#17141) Previously, we always applied parquet filters by post-filtering. This negates much of the potential gain from having filters available at read time, namely discarding row groups. To fix this, implement, with the new visitor system of #17016, conversion to pylibcudf expressions. We must distinguish two types of expressions, ones that we can evaluate via `cudf::compute_column`, and the more restricted set of expressions that the parquet reader understands, this is handled by having a state that tracks the usage. The former style will be useful when we implement inequality joins. While here, extend the support in pylibcudf expressions to handle all supported literal types and expose `compute_column` so we can test the correctness of the broader (non-parquet) implementation. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17141 --- python/cudf/cudf/_lib/transform.pyx | 24 +- python/cudf_polars/cudf_polars/dsl/ir.py | 9 + python/cudf_polars/cudf_polars/dsl/to_ast.py | 265 ++++++++++++++++++ .../cudf_polars/testing/asserts.py | 2 +- .../cudf_polars/cudf_polars/testing/plugin.py | 6 +- python/cudf_polars/tests/dsl/test_to_ast.py | 78 ++++++ .../cudf_polars/tests/test_parquet_filters.py | 60 ++++ python/pylibcudf/pylibcudf/expressions.pyx | 50 +++- .../pylibcudf/pylibcudf/libcudf/transform.pxd | 5 + python/pylibcudf/pylibcudf/libcudf/types.pxd | 11 +- .../pylibcudf/libcudf/wrappers/durations.pxd | 5 +- .../pylibcudf/libcudf/wrappers/timestamps.pxd | 5 +- .../pylibcudf/tests/test_expressions.py | 39 ++- python/pylibcudf/pylibcudf/transform.pxd | 3 + python/pylibcudf/pylibcudf/transform.pyx | 27 ++ 15 files changed, 552 insertions(+), 37 deletions(-) create mode 100644 python/cudf_polars/cudf_polars/dsl/to_ast.py create mode 100644 python/cudf_polars/tests/dsl/test_to_ast.py create mode 100644 python/cudf_polars/tests/test_parquet_filters.py diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 40d0c9eac3a..1589e23f716 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -7,20 +7,11 @@ from cudf.core._internals.expressions import parse_expression from cudf.core.buffer import acquire_spill_lock, as_buffer from cudf.utils import cudautils -from cython.operator cimport dereference -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -cimport pylibcudf.libcudf.transform as libcudf_transform from pylibcudf cimport transform as plc_transform from pylibcudf.expressions cimport Expression -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.expressions cimport expression -from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column -from cudf._lib.utils cimport table_view_from_columns import pylibcudf as plc @@ -121,13 +112,8 @@ def compute_column(list columns, tuple column_names, expr: str): # At the end, all the stack contains is the expression to evaluate. cdef Expression cudf_expr = visitor.expression - cdef table_view tbl = table_view_from_columns(columns) - cdef unique_ptr[column] col - with nogil: - col = move( - libcudf_transform.compute_column( - tbl, - dereference(cudf_expr.c_obj.get()) - ) - ) - return Column.from_unique_ptr(move(col)) + result = plc_transform.compute_column( + plc.Table([col.to_pylibcudf(mode="read") for col in columns]), + cudf_expr, + ) + return Column.from_pylibcudf(result) diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index f79e229d3f3..1aa6741d417 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -28,6 +28,7 @@ import cudf_polars.dsl.expr as expr from cudf_polars.containers import Column, DataFrame from cudf_polars.dsl.nodebase import Node +from cudf_polars.dsl.to_ast import to_parquet_filter from cudf_polars.utils import dtypes if TYPE_CHECKING: @@ -418,9 +419,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: colnames[0], ) elif self.typ == "parquet": + filters = None + if self.predicate is not None and self.row_index is None: + # Can't apply filters during read if we have a row index. + filters = to_parquet_filter(self.predicate.value) tbl_w_meta = plc.io.parquet.read_parquet( plc.io.SourceInfo(self.paths), columns=with_columns, + filters=filters, nrows=n_rows, skip_rows=self.skip_rows, ) @@ -429,6 +435,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: # TODO: consider nested column names? tbl_w_meta.column_names(include_children=False), ) + if filters is not None: + # Mask must have been applied. + return df elif self.typ == "ndjson": json_schema: list[tuple[str, str, list]] = [ (name, typ, []) for name, typ in self.schema.items() diff --git a/python/cudf_polars/cudf_polars/dsl/to_ast.py b/python/cudf_polars/cudf_polars/dsl/to_ast.py new file mode 100644 index 00000000000..ffdae81de55 --- /dev/null +++ b/python/cudf_polars/cudf_polars/dsl/to_ast.py @@ -0,0 +1,265 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +"""Conversion of expression nodes to libcudf AST nodes.""" + +from __future__ import annotations + +from functools import partial, reduce, singledispatch +from typing import TYPE_CHECKING, TypeAlias + +import pylibcudf as plc +from pylibcudf import expressions as plc_expr + +from polars.polars import _expr_nodes as pl_expr + +from cudf_polars.dsl import expr +from cudf_polars.dsl.traversal import CachingVisitor +from cudf_polars.typing import GenericTransformer + +if TYPE_CHECKING: + from collections.abc import Mapping + +# Can't merge these op-mapping dictionaries because scoped enum values +# are exposed by cython with equality/hash based one their underlying +# representation type. So in a dict they are just treated as integers. +BINOP_TO_ASTOP = { + plc.binaryop.BinaryOperator.EQUAL: plc_expr.ASTOperator.EQUAL, + plc.binaryop.BinaryOperator.NULL_EQUALS: plc_expr.ASTOperator.NULL_EQUAL, + plc.binaryop.BinaryOperator.NOT_EQUAL: plc_expr.ASTOperator.NOT_EQUAL, + plc.binaryop.BinaryOperator.LESS: plc_expr.ASTOperator.LESS, + plc.binaryop.BinaryOperator.LESS_EQUAL: plc_expr.ASTOperator.LESS_EQUAL, + plc.binaryop.BinaryOperator.GREATER: plc_expr.ASTOperator.GREATER, + plc.binaryop.BinaryOperator.GREATER_EQUAL: plc_expr.ASTOperator.GREATER_EQUAL, + plc.binaryop.BinaryOperator.ADD: plc_expr.ASTOperator.ADD, + plc.binaryop.BinaryOperator.SUB: plc_expr.ASTOperator.SUB, + plc.binaryop.BinaryOperator.MUL: plc_expr.ASTOperator.MUL, + plc.binaryop.BinaryOperator.DIV: plc_expr.ASTOperator.DIV, + plc.binaryop.BinaryOperator.TRUE_DIV: plc_expr.ASTOperator.TRUE_DIV, + plc.binaryop.BinaryOperator.FLOOR_DIV: plc_expr.ASTOperator.FLOOR_DIV, + plc.binaryop.BinaryOperator.PYMOD: plc_expr.ASTOperator.PYMOD, + plc.binaryop.BinaryOperator.BITWISE_AND: plc_expr.ASTOperator.BITWISE_AND, + plc.binaryop.BinaryOperator.BITWISE_OR: plc_expr.ASTOperator.BITWISE_OR, + plc.binaryop.BinaryOperator.BITWISE_XOR: plc_expr.ASTOperator.BITWISE_XOR, + plc.binaryop.BinaryOperator.LOGICAL_AND: plc_expr.ASTOperator.LOGICAL_AND, + plc.binaryop.BinaryOperator.LOGICAL_OR: plc_expr.ASTOperator.LOGICAL_OR, + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND: plc_expr.ASTOperator.NULL_LOGICAL_AND, + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR: plc_expr.ASTOperator.NULL_LOGICAL_OR, +} + +UOP_TO_ASTOP = { + plc.unary.UnaryOperator.SIN: plc_expr.ASTOperator.SIN, + plc.unary.UnaryOperator.COS: plc_expr.ASTOperator.COS, + plc.unary.UnaryOperator.TAN: plc_expr.ASTOperator.TAN, + plc.unary.UnaryOperator.ARCSIN: plc_expr.ASTOperator.ARCSIN, + plc.unary.UnaryOperator.ARCCOS: plc_expr.ASTOperator.ARCCOS, + plc.unary.UnaryOperator.ARCTAN: plc_expr.ASTOperator.ARCTAN, + plc.unary.UnaryOperator.SINH: plc_expr.ASTOperator.SINH, + plc.unary.UnaryOperator.COSH: plc_expr.ASTOperator.COSH, + plc.unary.UnaryOperator.TANH: plc_expr.ASTOperator.TANH, + plc.unary.UnaryOperator.ARCSINH: plc_expr.ASTOperator.ARCSINH, + plc.unary.UnaryOperator.ARCCOSH: plc_expr.ASTOperator.ARCCOSH, + plc.unary.UnaryOperator.ARCTANH: plc_expr.ASTOperator.ARCTANH, + plc.unary.UnaryOperator.EXP: plc_expr.ASTOperator.EXP, + plc.unary.UnaryOperator.LOG: plc_expr.ASTOperator.LOG, + plc.unary.UnaryOperator.SQRT: plc_expr.ASTOperator.SQRT, + plc.unary.UnaryOperator.CBRT: plc_expr.ASTOperator.CBRT, + plc.unary.UnaryOperator.CEIL: plc_expr.ASTOperator.CEIL, + plc.unary.UnaryOperator.FLOOR: plc_expr.ASTOperator.FLOOR, + plc.unary.UnaryOperator.ABS: plc_expr.ASTOperator.ABS, + plc.unary.UnaryOperator.RINT: plc_expr.ASTOperator.RINT, + plc.unary.UnaryOperator.BIT_INVERT: plc_expr.ASTOperator.BIT_INVERT, + plc.unary.UnaryOperator.NOT: plc_expr.ASTOperator.NOT, +} + +SUPPORTED_STATISTICS_BINOPS = { + plc.binaryop.BinaryOperator.EQUAL, + plc.binaryop.BinaryOperator.NOT_EQUAL, + plc.binaryop.BinaryOperator.LESS, + plc.binaryop.BinaryOperator.LESS_EQUAL, + plc.binaryop.BinaryOperator.GREATER, + plc.binaryop.BinaryOperator.GREATER_EQUAL, +} + +REVERSED_COMPARISON = { + plc.binaryop.BinaryOperator.EQUAL: plc.binaryop.BinaryOperator.EQUAL, + plc.binaryop.BinaryOperator.NOT_EQUAL: plc.binaryop.BinaryOperator.NOT_EQUAL, + plc.binaryop.BinaryOperator.LESS: plc.binaryop.BinaryOperator.GREATER, + plc.binaryop.BinaryOperator.LESS_EQUAL: plc.binaryop.BinaryOperator.GREATER_EQUAL, + plc.binaryop.BinaryOperator.GREATER: plc.binaryop.BinaryOperator.LESS, + plc.binaryop.BinaryOperator.GREATER_EQUAL: plc.binaryop.BinaryOperator.LESS_EQUAL, +} + + +Transformer: TypeAlias = GenericTransformer[expr.Expr, plc_expr.Expression] + + +@singledispatch +def _to_ast(node: expr.Expr, self: Transformer) -> plc_expr.Expression: + """ + Translate an expression to a pylibcudf Expression. + + Parameters + ---------- + node + Expression to translate. + self + Recursive transformer. The state dictionary should contain a + `for_parquet` key indicating if this transformation should + provide an expression suitable for use in parquet filters. + + If `for_parquet` is `False`, the dictionary should contain a + `name_to_index` mapping that maps column names to their + integer index in the table that will be used for evaluation of + the expression. + + Returns + ------- + pylibcudf Expression. + + Raises + ------ + NotImplementedError or KeyError if the expression cannot be translated. + """ + raise NotImplementedError(f"Unhandled expression type {type(node)}") + + +@_to_ast.register +def _(node: expr.Col, self: Transformer) -> plc_expr.Expression: + if self.state["for_parquet"]: + return plc_expr.ColumnNameReference(node.name) + return plc_expr.ColumnReference(self.state["name_to_index"][node.name]) + + +@_to_ast.register +def _(node: expr.Literal, self: Transformer) -> plc_expr.Expression: + return plc_expr.Literal(plc.interop.from_arrow(node.value)) + + +@_to_ast.register +def _(node: expr.BinOp, self: Transformer) -> plc_expr.Expression: + if node.op == plc.binaryop.BinaryOperator.NULL_NOT_EQUALS: + return plc_expr.Operation( + plc_expr.ASTOperator.NOT, + self( + # Reconstruct and apply, rather than directly + # constructing the right expression so we get the + # handling of parquet special cases for free. + expr.BinOp( + node.dtype, plc.binaryop.BinaryOperator.NULL_EQUALS, *node.children + ) + ), + ) + if self.state["for_parquet"]: + op1_col, op2_col = (isinstance(op, expr.Col) for op in node.children) + if op1_col ^ op2_col: + op = node.op + if op not in SUPPORTED_STATISTICS_BINOPS: + raise NotImplementedError( + f"Parquet filter binop with column doesn't support {node.op!r}" + ) + op1, op2 = node.children + if op2_col: + (op1, op2) = (op2, op1) + op = REVERSED_COMPARISON[op] + if not isinstance(op2, expr.Literal): + raise NotImplementedError( + "Parquet filter binops must have form 'col binop literal'" + ) + return plc_expr.Operation(BINOP_TO_ASTOP[op], self(op1), self(op2)) + elif op1_col and op2_col: + raise NotImplementedError( + "Parquet filter binops must have one column reference not two" + ) + return plc_expr.Operation(BINOP_TO_ASTOP[node.op], *map(self, node.children)) + + +@_to_ast.register +def _(node: expr.BooleanFunction, self: Transformer) -> plc_expr.Expression: + if node.name == pl_expr.BooleanFunction.IsIn: + needles, haystack = node.children + if isinstance(haystack, expr.LiteralColumn) and len(haystack.value) < 16: + # 16 is an arbitrary limit + needle_ref = self(needles) + values = [ + plc_expr.Literal(plc.interop.from_arrow(v)) for v in haystack.value + ] + return reduce( + partial(plc_expr.Operation, plc_expr.ASTOperator.LOGICAL_OR), + ( + plc_expr.Operation(plc_expr.ASTOperator.EQUAL, needle_ref, value) + for value in values + ), + ) + if self.state["for_parquet"] and isinstance(node.children[0], expr.Col): + raise NotImplementedError( + f"Parquet filters don't support {node.name} on columns" + ) + if node.name == pl_expr.BooleanFunction.IsNull: + return plc_expr.Operation(plc_expr.ASTOperator.IS_NULL, self(node.children[0])) + elif node.name == pl_expr.BooleanFunction.IsNotNull: + return plc_expr.Operation( + plc_expr.ASTOperator.NOT, + plc_expr.Operation(plc_expr.ASTOperator.IS_NULL, self(node.children[0])), + ) + elif node.name == pl_expr.BooleanFunction.Not: + return plc_expr.Operation(plc_expr.ASTOperator.NOT, self(node.children[0])) + raise NotImplementedError(f"AST conversion does not support {node.name}") + + +@_to_ast.register +def _(node: expr.UnaryFunction, self: Transformer) -> plc_expr.Expression: + if isinstance(node.children[0], expr.Col) and self.state["for_parquet"]: + raise NotImplementedError( + "Parquet filters don't support {node.name} on columns" + ) + return plc_expr.Operation( + UOP_TO_ASTOP[node._OP_MAPPING[node.name]], self(node.children[0]) + ) + + +def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None: + """ + Convert an expression to libcudf AST nodes suitable for parquet filtering. + + Parameters + ---------- + node + Expression to convert. + + Returns + ------- + pylibcudf Expression if conversion is possible, otherwise None. + """ + mapper = CachingVisitor(_to_ast, state={"for_parquet": True}) + try: + return mapper(node) + except (KeyError, NotImplementedError): + return None + + +def to_ast( + node: expr.Expr, *, name_to_index: Mapping[str, int] +) -> plc_expr.Expression | None: + """ + Convert an expression to libcudf AST nodes suitable for compute_column. + + Parameters + ---------- + node + Expression to convert. + name_to_index + Mapping from column names to their index in the table that + will be used for expression evaluation. + + Returns + ------- + pylibcudf Expressoin if conversion is possible, otherwise None. + """ + mapper = CachingVisitor( + _to_ast, state={"for_parquet": False, "name_to_index": name_to_index} + ) + try: + return mapper(node) + except (KeyError, NotImplementedError): + return None diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index 7b6f3848fc4..7b45c1eaa06 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -151,7 +151,7 @@ def assert_collect_raises( collect_kwargs: dict[OptimizationArgs, bool] | None = None, polars_collect_kwargs: dict[OptimizationArgs, bool] | None = None, cudf_collect_kwargs: dict[OptimizationArgs, bool] | None = None, -): +) -> None: """ Assert that collecting the result of a query raises the expected exceptions. diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index a3607159e01..e01ccd05527 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -16,7 +16,7 @@ from collections.abc import Mapping -def pytest_addoption(parser: pytest.Parser): +def pytest_addoption(parser: pytest.Parser) -> None: """Add plugin-specific options.""" group = parser.getgroup( "cudf-polars", "Plugin to set GPU as default engine for polars tests" @@ -28,7 +28,7 @@ def pytest_addoption(parser: pytest.Parser): ) -def pytest_configure(config: pytest.Config): +def pytest_configure(config: pytest.Config) -> None: """Enable use of this module as a pytest plugin to enable GPU collection.""" no_fallback = config.getoption("--cudf-polars-no-fallback") collect = polars.LazyFrame.collect @@ -172,7 +172,7 @@ def pytest_configure(config: pytest.Config): def pytest_collection_modifyitems( session: pytest.Session, config: pytest.Config, items: list[pytest.Item] -): +) -> None: """Mark known failing tests.""" if config.getoption("--cudf-polars-no-fallback"): # Don't xfail tests if running without fallback diff --git a/python/cudf_polars/tests/dsl/test_to_ast.py b/python/cudf_polars/tests/dsl/test_to_ast.py new file mode 100644 index 00000000000..a7b779a6ec9 --- /dev/null +++ b/python/cudf_polars/tests/dsl/test_to_ast.py @@ -0,0 +1,78 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pylibcudf as plc +import pytest + +import polars as pl +from polars.testing import assert_frame_equal + +import cudf_polars.dsl.ir as ir_nodes +from cudf_polars import translate_ir +from cudf_polars.containers.dataframe import DataFrame, NamedColumn +from cudf_polars.dsl.to_ast import to_ast + + +@pytest.fixture(scope="module") +def df(): + return pl.LazyFrame( + { + "c": ["a", "b", "c", "d", "e", "f"], + "a": [1, 2, 3, None, 4, 5], + "b": pl.Series([None, None, 3, float("inf"), 4, 0], dtype=pl.Float64), + "d": [False, True, True, None, False, False], + } + ) + + +@pytest.mark.parametrize( + "expr", + [ + pl.col("a").is_in([0, 1]), + pl.col("a").is_between(0, 2), + (pl.col("a") < pl.col("b")).not_(), + pl.lit(2) > pl.col("a"), + pl.lit(2) >= pl.col("a"), + pl.lit(2) < pl.col("a"), + pl.lit(2) <= pl.col("a"), + pl.lit(0) == pl.col("a"), + pl.lit(1) != pl.col("a"), + (pl.col("b") < pl.lit(2, dtype=pl.Float64).sqrt()), + (pl.col("a") >= pl.lit(2)) & (pl.col("b") > 0), + pl.col("a").is_null(), + pl.col("a").is_not_null(), + pl.col("b").is_finite(), + pytest.param( + pl.col("a").sin(), + marks=pytest.mark.xfail(reason="Need to insert explicit casts"), + ), + pl.col("b").cos(), + pl.col("a").abs().is_between(0, 2), + pl.col("a").ne_missing(pl.lit(None, dtype=pl.Int64)), + [pl.col("a") * 2, pl.col("b") + pl.col("a")], + pl.col("d").not_(), + ], +) +def test_compute_column(expr, df): + q = df.select(expr) + ir = translate_ir(q._ldf.visit()) + + assert isinstance(ir, ir_nodes.Select) + table = ir.children[0].evaluate(cache={}) + name_to_index = {c.name: i for i, c in enumerate(table.columns)} + + def compute_column(e): + ast = to_ast(e.value, name_to_index=name_to_index) + if ast is not None: + return NamedColumn( + plc.transform.compute_column(table.table, ast), name=e.name + ) + return e.evaluate(table) + + got = DataFrame(map(compute_column, ir.exprs)).to_polars() + + expect = q.collect() + + assert_frame_equal(expect, got) diff --git a/python/cudf_polars/tests/test_parquet_filters.py b/python/cudf_polars/tests/test_parquet_filters.py new file mode 100644 index 00000000000..545a89250fc --- /dev/null +++ b/python/cudf_polars/tests/test_parquet_filters.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl +from polars.testing import assert_frame_equal + + +@pytest.fixture(scope="module") +def df(): + return pl.DataFrame( + { + "c": ["a", "b", "c", "d", "e", "f"], + "a": [1, 2, 3, None, 4, 5], + "b": pl.Series([None, None, 3, float("inf"), 4, 0], dtype=pl.Float64), + "d": [-1, 2, -3, None, 4, -5], + } + ) + + +@pytest.fixture(scope="module") +def pq_file(tmp_path_factory, df): + tmp_path = tmp_path_factory.mktemp("parquet_filter") + df.write_parquet(tmp_path / "tmp.pq", row_group_size=3) + return pl.scan_parquet(tmp_path / "tmp.pq") + + +@pytest.mark.parametrize( + "expr", + [ + pl.col("a").is_in([0, 1]), + pl.col("a").is_between(0, 2), + (pl.col("a") < 2).not_(), + pl.lit(2) > pl.col("a"), + pl.lit(2) >= pl.col("a"), + pl.lit(2) < pl.col("a"), + pl.lit(2) <= pl.col("a"), + pl.lit(0) == pl.col("a"), + pl.lit(1) != pl.col("a"), + pl.col("a") == pl.col("d"), + (pl.col("b") < pl.lit(2, dtype=pl.Float64).sqrt()), + (pl.col("a") >= pl.lit(2)) & (pl.col("b") > 0), + pl.col("b").is_finite(), + pl.col("a").is_null(), + pl.col("a").is_not_null(), + pl.col("a").abs().is_between(0, 2), + pl.col("a").ne_missing(pl.lit(None, dtype=pl.Int64)), + ], +) +@pytest.mark.parametrize("selection", [["c", "b"], ["a"], ["a", "c"], ["b"], "c"]) +def test_scan_by_hand(expr, selection, pq_file): + df = pq_file.collect() + q = pq_file.filter(expr).select(*selection) + # Not using assert_gpu_result_equal because + # https://github.com/pola-rs/polars/issues/19238 + got = q.collect(engine=pl.GPUEngine(raise_on_fail=True)) + expect = df.filter(expr).select(*selection) + assert_frame_equal(got, expect) diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index a44c9e25987..1535f68366b 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -5,7 +5,17 @@ from pylibcudf.libcudf.expressions import \ table_reference as TableReference # no-cython-lint from cython.operator cimport dereference -from libc.stdint cimport int32_t, int64_t +from libc.stdint cimport ( + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) +from libcpp cimport bool from libcpp.memory cimport make_unique, unique_ptr from libcpp.string cimport string from libcpp.utility cimport move @@ -18,12 +28,14 @@ from pylibcudf.libcudf.scalar.scalar cimport ( ) from pylibcudf.libcudf.types cimport size_type, type_id from pylibcudf.libcudf.wrappers.durations cimport ( + duration_D, duration_ms, duration_ns, duration_s, duration_us, ) from pylibcudf.libcudf.wrappers.timestamps cimport ( + timestamp_D, timestamp_ms, timestamp_ns, timestamp_s, @@ -78,6 +90,34 @@ cdef class Literal(Expression): self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) )) + elif tid == type_id.INT16: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.INT8: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT64: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT32: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT16: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT8: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.BOOL8: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) elif tid == type_id.FLOAT64: self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) @@ -110,6 +150,10 @@ cdef class Literal(Expression): self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) )) + elif tid == type_id.TIMESTAMP_DAYS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) elif tid == type_id.DURATION_NANOSECONDS: self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) @@ -130,6 +174,10 @@ cdef class Literal(Expression): self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) )) + elif tid == type_id.DURATION_DAYS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) else: raise NotImplementedError( f"Don't know how to make literal with type id {tid}" diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd index d21510bd731..47d79083b66 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd @@ -27,6 +27,11 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: column_view input ) except + + cdef unique_ptr[column] compute_column( + table_view table, + expression expr + ) except + + cdef unique_ptr[column] transform( column_view input, string unary_udf, diff --git a/python/pylibcudf/pylibcudf/libcudf/types.pxd b/python/pylibcudf/pylibcudf/libcudf/types.pxd index eabae68bc90..60e293e5cdb 100644 --- a/python/pylibcudf/pylibcudf/libcudf/types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/types.pxd @@ -70,18 +70,19 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: TIMESTAMP_MILLISECONDS TIMESTAMP_MICROSECONDS TIMESTAMP_NANOSECONDS - DICTIONARY32 - STRING - LIST - STRUCT - NUM_TYPE_IDS + DURATION_DAYS DURATION_SECONDS DURATION_MILLISECONDS DURATION_MICROSECONDS DURATION_NANOSECONDS + DICTIONARY32 + STRING + LIST DECIMAL32 DECIMAL64 DECIMAL128 + STRUCT + NUM_TYPE_IDS cdef cppclass data_type: data_type() except + diff --git a/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd index 7c648425eb5..c9c960d0a79 100644 --- a/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd @@ -1,9 +1,10 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport int64_t +from libc.stdint cimport int32_t, int64_t cdef extern from "cudf/wrappers/durations.hpp" namespace "cudf" nogil: + ctypedef int32_t duration_D ctypedef int64_t duration_s ctypedef int64_t duration_ms ctypedef int64_t duration_us diff --git a/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd index 50d37fd0a68..5dcd144529d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd @@ -1,9 +1,10 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport int64_t +from libc.stdint cimport int32_t, int64_t cdef extern from "cudf/wrappers/timestamps.hpp" namespace "cudf" nogil: + ctypedef int32_t timestamp_D ctypedef int64_t timestamp_s ctypedef int64_t timestamp_ms ctypedef int64_t timestamp_us diff --git a/python/pylibcudf/pylibcudf/tests/test_expressions.py b/python/pylibcudf/pylibcudf/tests/test_expressions.py index 6eabd6db617..52c81c49b9d 100644 --- a/python/pylibcudf/pylibcudf/tests/test_expressions.py +++ b/python/pylibcudf/pylibcudf/tests/test_expressions.py @@ -1,12 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa +import pyarrow.compute as pc import pytest +from utils import assert_column_eq import pylibcudf as plc -# We can't really evaluate these expressions, so just make sure -# construction works properly - def test_literal_construction_invalid(): with pytest.raises(ValueError): @@ -23,7 +22,7 @@ def test_literal_construction_invalid(): ], ) def test_columnref_construction(tableref): - plc.expressions.ColumnReference(1.0, tableref) + plc.expressions.ColumnReference(1, tableref) def test_columnnameref_construction(): @@ -48,3 +47,35 @@ def test_columnnameref_construction(): ) def test_astoperation_construction(kwargs): plc.expressions.Operation(**kwargs) + + +def test_evaluation(): + table_h = pa.table({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + lit = pa.scalar(42, type=pa.int64()) + table = plc.interop.from_arrow(table_h) + # expr = abs(b * c - (a + 42)) + expr = plc.expressions.Operation( + plc.expressions.ASTOperator.ABS, + plc.expressions.Operation( + plc.expressions.ASTOperator.SUB, + plc.expressions.Operation( + plc.expressions.ASTOperator.MUL, + plc.expressions.ColumnReference(1), + plc.expressions.ColumnReference(2), + ), + plc.expressions.Operation( + plc.expressions.ASTOperator.ADD, + plc.expressions.ColumnReference(0), + plc.expressions.Literal(plc.interop.from_arrow(lit)), + ), + ), + ) + + expect = pc.abs( + pc.subtract( + pc.multiply(table_h["b"], table_h["c"]), pc.add(table_h["a"], lit) + ) + ) + got = plc.transform.compute_column(table, expr) + + assert_column_eq(expect, got) diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd index b530f433c97..4fb623158f0 100644 --- a/python/pylibcudf/pylibcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/transform.pxd @@ -3,6 +3,7 @@ from libcpp cimport bool from pylibcudf.libcudf.types cimport bitmask_type, data_type from .column cimport Column +from .expressions cimport Expression from .gpumemoryview cimport gpumemoryview from .table cimport Table from .types cimport DataType @@ -10,6 +11,8 @@ from .types cimport DataType cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input) +cpdef Column compute_column(Table input, Expression expr) + cpdef tuple[gpumemoryview, int] bools_to_mask(Column input) cpdef Column mask_to_bools(Py_ssize_t bitmask, int begin_bit, int end_bit) diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx index bce9702752a..e8d95cadb0c 100644 --- a/python/pylibcudf/pylibcudf/transform.pyx +++ b/python/pylibcudf/pylibcudf/transform.pyx @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from cython.operator cimport dereference from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.utility cimport move, pair @@ -43,6 +44,32 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input): ) +cpdef Column compute_column(Table input, Expression expr): + """Create a column by evaluating an expression on a table. + + For details see :cpp:func:`compute_column`. + + Parameters + ---------- + input : Table + Table used for expression evaluation + expr : Expression + Expression to evaluate + + Returns + ------- + Column of the evaluated expression + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_transform.compute_column( + input.view(), dereference(expr.c_obj.get()) + ) + + return Column.from_libcudf(move(c_result)) + + cpdef tuple[gpumemoryview, int] bools_to_mask(Column input): """Create a bitmask from a column of boolean elements From 5a6d177b259bcda647a393bc1df63f06bb26b56f Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Wed, 30 Oct 2024 14:49:50 -0500 Subject: [PATCH 55/76] Fix ``to_parquet`` append behavior with global metadata file (#17198) Closes https://github.com/rapidsai/cudf/issues/17177 When appending to a parquet dataset with Dask cuDF, the original metadata must be converted from `pq.FileMetaData` to `bytes` before it can be passed down to `cudf.io.merge_parquet_filemetadata`. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/cudf/pull/17198 --- python/dask_cudf/dask_cudf/io/parquet.py | 6 ++++++ .../dask_cudf/io/tests/test_parquet.py | 20 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index a781b8242fe..39ac6474958 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -383,6 +383,12 @@ def write_metadata(parts, fmd, fs, path, append=False, **kwargs): metadata_path = fs.sep.join([path, "_metadata"]) _meta = [] if append and fmd is not None: + # Convert to bytes: + if isinstance(fmd, pq.FileMetaData): + with BytesIO() as myio: + fmd.write_metadata_file(myio) + myio.seek(0) + fmd = np.frombuffer(myio.read(), dtype="uint8") _meta = [fmd] _meta.extend([parts[i][0]["meta"] for i in range(len(parts))]) _meta = ( diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index ae5ca480e31..a29cf9a342a 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -644,3 +644,23 @@ def test_read_parquet_arrow_filesystem(tmpdir, min_part_size): dd.assert_eq(df, ddf, check_index=False) assert isinstance(ddf._meta, cudf.DataFrame) assert isinstance(ddf.compute(), cudf.DataFrame) + + +@pytest.mark.parametrize("write_metadata_file", [True, False]) +def test_to_parquet_append(tmpdir, write_metadata_file): + df = cudf.DataFrame({"a": [1, 2, 3]}) + ddf = dask_cudf.from_cudf(df, npartitions=1) + ddf.to_parquet( + tmpdir, + append=True, + write_metadata_file=write_metadata_file, + write_index=False, + ) + ddf.to_parquet( + tmpdir, + append=True, + write_metadata_file=write_metadata_file, + write_index=False, + ) + ddf2 = dask_cudf.read_parquet(tmpdir) + dd.assert_eq(cudf.concat([df, df]), ddf2) From 3cf186c5c31b658f0cb7b5f180de0e72f533d413 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 30 Oct 2024 20:14:44 -0400 Subject: [PATCH 56/76] Add remaining datetime APIs to pylibcudf (#17143) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - https://github.com/brandon-b-miller URL: https://github.com/rapidsai/cudf/pull/17143 --- python/pylibcudf/pylibcudf/datetime.pxd | 51 ++- python/pylibcudf/pylibcudf/datetime.pyx | 319 +++++++++++++++++- .../pylibcudf/pylibcudf/libcudf/datetime.pxd | 22 +- .../pylibcudf/tests/test_datetime.py | 152 +++++++++ 4 files changed, 519 insertions(+), 25 deletions(-) diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd index 72ce680ba7a..335ef435f9b 100644 --- a/python/pylibcudf/pylibcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/datetime.pxd @@ -1,15 +1,56 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from pylibcudf.libcudf.datetime cimport datetime_component +from pylibcudf.column cimport Column +from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency +from pylibcudf.scalar cimport Scalar -from .column cimport Column +ctypedef fused ColumnOrScalar: + Column + Scalar +cpdef Column extract_millisecond_fraction( + Column input +) + +cpdef Column extract_microsecond_fraction( + Column input +) -cpdef Column extract_year( - Column col +cpdef Column extract_nanosecond_fraction( + Column input ) cpdef Column extract_datetime_component( - Column col, + Column input, datetime_component component ) + +cpdef Column ceil_datetimes( + Column input, + rounding_frequency freq +) + +cpdef Column floor_datetimes( + Column input, + rounding_frequency freq +) + +cpdef Column round_datetimes( + Column input, + rounding_frequency freq +) + +cpdef Column add_calendrical_months( + Column timestamps, + ColumnOrScalar months, +) + +cpdef Column day_of_year(Column input) + +cpdef Column is_leap_year(Column input) + +cpdef Column last_day_of_month(Column input) + +cpdef Column extract_quarter(Column input) + +cpdef Column days_in_month(Column input) diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index ac4335cca56..9e5e709d81d 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -3,41 +3,106 @@ from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.datetime cimport ( + add_calendrical_months as cpp_add_calendrical_months, + ceil_datetimes as cpp_ceil_datetimes, datetime_component, + day_of_year as cpp_day_of_year, + days_in_month as cpp_days_in_month, extract_datetime_component as cpp_extract_datetime_component, - extract_year as cpp_extract_year, + extract_microsecond_fraction as cpp_extract_microsecond_fraction, + extract_millisecond_fraction as cpp_extract_millisecond_fraction, + extract_nanosecond_fraction as cpp_extract_nanosecond_fraction, + extract_quarter as cpp_extract_quarter, + floor_datetimes as cpp_floor_datetimes, + is_leap_year as cpp_is_leap_year, + last_day_of_month as cpp_last_day_of_month, + round_datetimes as cpp_round_datetimes, + rounding_frequency, ) from pylibcudf.libcudf.datetime import \ datetime_component as DatetimeComponent # no-cython-lint +from pylibcudf.libcudf.datetime import \ + rounding_frequency as RoundingFrequency # no-cython-lint + +from cython.operator cimport dereference from .column cimport Column +cpdef Column extract_millisecond_fraction( + Column input +): + """ + Extract the millisecond from a datetime column. + + For details, see :cpp:func:`extract_millisecond_fraction`. + + Parameters + ---------- + input : Column + The column to extract the millisecond from. + + Returns + ------- + Column + Column with the extracted milliseconds. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_extract_millisecond_fraction(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column extract_microsecond_fraction( + Column input +): + """ + Extract the microsecond fraction from a datetime column. + + For details, see :cpp:func:`extract_microsecond_fraction`. + + Parameters + ---------- + input : Column + The column to extract the microsecond fraction from. + + Returns + ------- + Column + Column with the extracted microsecond fractions. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_extract_microsecond_fraction(input.view()) + return Column.from_libcudf(move(result)) -cpdef Column extract_year( - Column values +cpdef Column extract_nanosecond_fraction( + Column input ): """ - Extract the year from a datetime column. + Extract the nanosecond fraction from a datetime column. + + For details, see :cpp:func:`extract_nanosecond_fraction`. Parameters ---------- - values : Column - The column to extract the year from. + input : Column + The column to extract the nanosecond fraction from. Returns ------- Column - Column with the extracted years. + Column with the extracted nanosecond fractions. """ cdef unique_ptr[column] result with nogil: - result = cpp_extract_year(values.view()) + result = cpp_extract_nanosecond_fraction(input.view()) return Column.from_libcudf(move(result)) cpdef Column extract_datetime_component( - Column values, + Column input, datetime_component component ): """ @@ -47,7 +112,7 @@ cpdef Column extract_datetime_component( Parameters ---------- - values : Column + input : Column The column to extract the component from. component : DatetimeComponent The datetime component to extract. @@ -60,5 +125,237 @@ cpdef Column extract_datetime_component( cdef unique_ptr[column] result with nogil: - result = cpp_extract_datetime_component(values.view(), component) + result = cpp_extract_datetime_component(input.view(), component) + return Column.from_libcudf(move(result)) + +cpdef Column ceil_datetimes( + Column input, + rounding_frequency freq +): + """ + Round datetimes up to the nearest multiple of the given frequency. + + For details, see :cpp:func:`ceil_datetimes`. + + Parameters + ---------- + input : Column + The column of input datetime values. + freq : rounding_frequency + The frequency to round up to. + + Returns + ------- + Column + Column of the same datetime resolution as the input column. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_ceil_datetimes(input.view(), freq) + return Column.from_libcudf(move(result)) + +cpdef Column floor_datetimes( + Column input, + rounding_frequency freq +): + """ + Round datetimes down to the nearest multiple of the given frequency. + + For details, see :cpp:func:`floor_datetimes`. + + Parameters + ---------- + input : Column + The column of input datetime values. + freq : rounding_frequency + The frequency to round down to. + + Returns + ------- + Column + Column of the same datetime resolution as the input column. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_floor_datetimes(input.view(), freq) + return Column.from_libcudf(move(result)) + +cpdef Column round_datetimes( + Column input, + rounding_frequency freq +): + """ + Round datetimes to the nearest multiple of the given frequency. + + For details, see :cpp:func:`round_datetimes`. + + Parameters + ---------- + input : Column + The column of input datetime values. + freq : rounding_frequency + The frequency to round to. + + Returns + ------- + Column + Column of the same datetime resolution as the input column. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_round_datetimes(input.view(), freq) + return Column.from_libcudf(move(result)) + +cpdef Column add_calendrical_months( + Column input, + ColumnOrScalar months, +): + """ + Adds or subtracts a number of months from the datetime + type and returns a timestamp column that is of the same + type as the input timestamps column. + + For details, see :cpp:func:`add_calendrical_months`. + + Parameters + ---------- + input : Column + The column of input timestamp values. + months : ColumnOrScalar + The number of months to add. + + Returns + ------- + Column + Column of computed timestamps. + """ + if not isinstance(months, (Column, Scalar)): + raise TypeError("Must pass a Column or Scalar") + + cdef unique_ptr[column] result + + with nogil: + result = cpp_add_calendrical_months( + input.view(), + months.view() if ColumnOrScalar is Column else + dereference(months.get()) + ) + return Column.from_libcudf(move(result)) + +cpdef Column day_of_year(Column input): + """ + Computes the day number since the start of + the year from the datetime. The value is between + [1, {365-366}]. + + For details, see :cpp:func:`day_of_year`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of day numbers. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_day_of_year(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column is_leap_year(Column input): + """ + Check if the year of the given date is a leap year. + + For details, see :cpp:func:`is_leap_year`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of bools indicating whether the given year + is a leap year. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_is_leap_year(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column last_day_of_month(Column input): + """ + Computes the last day of the month. + + For details, see :cpp:func:`last_day_of_month`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of ``TIMESTAMP_DAYS`` representing the last day + of the month. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_last_day_of_month(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column extract_quarter(Column input): + """ + Returns the quarter (ie. a value from {1, 2, 3, 4}) + that the date is in. + + For details, see :cpp:func:`extract_quarter`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column indicating which quarter the date is in. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_extract_quarter(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column days_in_month(Column input): + """ + Extract the number of days in the month. + + For details, see :cpp:func:`days_in_month`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of the number of days in the given month. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_days_in_month(input.view()) return Column.from_libcudf(move(result)) diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index 73cdfb96af5..8bbc120cff8 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport uint8_t +from libc.stdint cimport int32_t, uint8_t from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -41,14 +41,14 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: datetime_component component ) except + - ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency": - DAY "cudf::datetime::rounding_frequency::DAY" - HOUR "cudf::datetime::rounding_frequency::HOUR" - MINUTE "cudf::datetime::rounding_frequency::MINUTE" - SECOND "cudf::datetime::rounding_frequency::SECOND" - MILLISECOND "cudf::datetime::rounding_frequency::MILLISECOND" - MICROSECOND "cudf::datetime::rounding_frequency::MICROSECOND" - NANOSECOND "cudf::datetime::rounding_frequency::NANOSECOND" + cpdef enum class rounding_frequency(int32_t): + DAY + HOUR + MINUTE + SECOND + MILLISECOND + MICROSECOND + NANOSECOND cdef unique_ptr[column] ceil_datetimes( const column_view& column, rounding_frequency freq @@ -64,6 +64,10 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: const column_view& timestamps, const column_view& months ) except + + cdef unique_ptr[column] add_calendrical_months( + const column_view& timestamps, + const scalar& months + ) except + cdef unique_ptr[column] day_of_year(const column_view& column) except + cdef unique_ptr[column] is_leap_year(const column_view& column) except + cdef unique_ptr[column] last_day_of_month( diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py index a80ab8d9f65..f5f24ef28e2 100644 --- a/python/pylibcudf/pylibcudf/tests/test_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import calendar import datetime import pyarrow as pa @@ -46,6 +47,21 @@ def component(request): return request.param +@pytest.fixture( + params=[ + ("day", plc.datetime.RoundingFrequency.DAY), + ("hour", plc.datetime.RoundingFrequency.HOUR), + ("minute", plc.datetime.RoundingFrequency.MINUTE), + ("second", plc.datetime.RoundingFrequency.SECOND), + ("millisecond", plc.datetime.RoundingFrequency.MILLISECOND), + ("microsecond", plc.datetime.RoundingFrequency.MICROSECOND), + ("nanosecond", plc.datetime.RoundingFrequency.NANOSECOND), + ] +) +def rounding_frequency(request): + return request.param + + def test_extract_datetime_component(datetime_column, component): attr, component = component kwargs = {} @@ -59,3 +75,139 @@ def test_extract_datetime_component(datetime_column, component): ).cast(pa.int16()) assert_column_eq(expect, got) + + +@pytest.mark.parametrize( + "datetime_func", + [ + "extract_millisecond_fraction", + "extract_microsecond_fraction", + "extract_nanosecond_fraction", + ], +) +def test_datetime_extracting_functions(datetime_column, datetime_func): + pa_col = plc.interop.to_arrow(datetime_column) + got = getattr(plc.datetime, datetime_func)(datetime_column) + kwargs = {} + attr = datetime_func.split("_")[1] + if attr == "weekday": + kwargs = {"count_from_zero": False} + attr = "day_of_week" + expect = getattr(pc, attr)(pa_col, **kwargs).cast(pa.int16()) + assert_column_eq(expect, got) + + +@pytest.mark.parametrize( + "op", + [ + ("ceil_temporal", "ceil_datetimes"), + ("floor_temporal", "floor_datetimes"), + ("round_temporal", "round_datetimes"), + ], +) +def test_rounding_operations(datetime_column, op, rounding_frequency): + got = getattr(plc.datetime, op[1])(datetime_column, rounding_frequency[1]) + pa_col = plc.interop.to_arrow(datetime_column) + pa_got = plc.interop.to_arrow(got) + expect = getattr(pc, op[0])( + pa_col, + unit=rounding_frequency[0], + ).cast(pa_got.type) + assert_column_eq(expect, got) + + +@pytest.mark.parametrize( + "months", + [ + pa.scalar(-3, pa.int32()), + pa.scalar(1, pa.int16()), + pa.array([1, -3, 2, 4, -1, 5], pa.int32()), + ], +) +def test_calendrical_months(datetime_column, months): + def add_calendrical_months(timestamps, months): + result = [] + if isinstance(months, pa.Array): + months_list = months.to_pylist() + else: + months_list = [months.as_py()] * len(timestamps) + for i, dt in enumerate(timestamps): + if dt.as_py() is not None: + year, month = dt.as_py().year, dt.as_py().month + new_month = month + months_list[i] + new_year = year + (new_month - 1) // 12 + result.append( + dt.as_py().replace( + year=new_year, month=(new_month - 1) % 12 + 1 + ) + ) + else: + result.append(None) + return pa.array(result) + + pa_col = plc.interop.to_arrow(datetime_column) + got = plc.datetime.add_calendrical_months( + datetime_column, plc.interop.from_arrow(months) + ) + pa_got = plc.interop.to_arrow(got) + expect = add_calendrical_months(pa_col, months).cast(pa_got.type) + assert_column_eq(expect, got) + + +def test_day_of_year(datetime_column): + got = plc.datetime.day_of_year(datetime_column) + pa_got = plc.interop.to_arrow(got) + pa_col = plc.interop.to_arrow(datetime_column) + expect = pa.array( + [ + d.as_py().timetuple().tm_yday if d.as_py() is not None else None + for d in pa_col + ], + type=pa_got.type, + ) + assert_column_eq(expect, got) + + +def test_is_leap_year(datetime_column): + got = plc.datetime.is_leap_year(datetime_column) + pa_col = plc.interop.to_arrow(datetime_column) + expect = pc.is_leap_year(pa_col) + assert_column_eq(expect, got) + + +def test_last_day_of_month(datetime_column): + def last_day_of_month(dates): + return [ + d.replace(day=calendar.monthrange(d.year, d.month)[1]) + if d is not None + else d + for d in dates.to_pylist() + ] + + got = plc.datetime.last_day_of_month(datetime_column) + pa_got = plc.interop.to_arrow(got) + pa_col = plc.interop.to_arrow(datetime_column) + expect = pa.array(last_day_of_month(pa_col), type=pa_got.type) + assert_column_eq(expect, got) + + +def test_extract_quarter(datetime_column): + got = plc.datetime.extract_quarter(datetime_column) + pa_col = plc.interop.to_arrow(datetime_column) + pa_got = plc.interop.to_arrow(got) + expect = pc.quarter(pa_col).cast(pa_got.type) + assert_column_eq(expect, got) + + +def test_days_in_month(datetime_column): + def days_in_month(dates): + return [ + calendar.monthrange(d.year, d.month)[1] if d is not None else None + for d in dates.to_pylist() + ] + + got = plc.datetime.days_in_month(datetime_column) + pa_col = plc.interop.to_arrow(datetime_column) + pa_got = plc.interop.to_arrow(got) + expect = pa.array(days_in_month(pa_col), type=pa_got.type) + assert_column_eq(expect, got) From 0e294b1580612d4106afb5044b33ed33dd6fe0ec Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 30 Oct 2024 17:32:55 -0700 Subject: [PATCH 57/76] Add compute_shared_memory_aggs used by shared memory groupby (#17162) This work is part of splitting the original bulk shared memory groupby PR https://github.com/rapidsai/cudf/pull/16619. This PR introduces the `compute_shared_memory_aggs` API, which is utilized by the shared memory groupby. The shared memory groupby process consists of two main steps. The first step was introduced in #17147, and this PR implements the second step, where the actual aggregations are performed based on the offsets from the first step. Each thread block is designed to handle up to 128 unique keys. If this limit is exceeded, there won't be enough space to store temporary aggregation results in shared memory, so a flag is set to indicate that follow-up global memory aggregations are needed to complete the remaining aggregation requests. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17162 --- cpp/CMakeLists.txt | 1 + .../hash/compute_shared_memory_aggs.cu | 323 ++++++++++++++++++ .../hash/compute_shared_memory_aggs.hpp | 41 +++ cpp/src/groupby/hash/helpers.cuh | 9 - cpp/src/groupby/hash/single_pass_functors.cuh | 81 +++++ 5 files changed, 446 insertions(+), 9 deletions(-) create mode 100644 cpp/src/groupby/hash/compute_shared_memory_aggs.cu create mode 100644 cpp/src/groupby/hash/compute_shared_memory_aggs.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 60132f651d2..bfa4bf80724 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -371,6 +371,7 @@ add_library( src/groupby/hash/compute_groupby.cu src/groupby/hash/compute_mapping_indices.cu src/groupby/hash/compute_mapping_indices_null.cu + src/groupby/hash/compute_shared_memory_aggs.cu src/groupby/hash/compute_single_pass_aggs.cu src/groupby/hash/create_sparse_results_table.cu src/groupby/hash/flatten_single_pass_aggs.cpp diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu new file mode 100644 index 00000000000..12c02a1865e --- /dev/null +++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_shared_memory_aggs.hpp" +#include "global_memory_aggregator.cuh" +#include "helpers.cuh" +#include "shared_memory_aggregator.cuh" +#include "single_pass_functors.cuh" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +namespace { +/// Functor used by type dispatcher returning the size of the underlying C++ type +struct size_of_functor { + template + __device__ constexpr cudf::size_type operator()() + { + return sizeof(T); + } +}; + +/// Shared memory data alignment +CUDF_HOST_DEVICE cudf::size_type constexpr ALIGNMENT = 8; + +// Prepares shared memory data required by each output column, exits if +// no enough memory space to perform the shared memory aggregation for the +// current output column +__device__ void calculate_columns_to_aggregate(cudf::size_type& col_start, + cudf::size_type& col_end, + cudf::mutable_table_device_view output_values, + cudf::size_type output_size, + cudf::size_type* shmem_agg_res_offsets, + cudf::size_type* shmem_agg_mask_offsets, + cudf::size_type cardinality, + cudf::size_type total_agg_size) +{ + col_start = col_end; + cudf::size_type bytes_allocated = 0; + + auto const valid_col_size = + cudf::util::round_up_safe(static_cast(sizeof(bool) * cardinality), ALIGNMENT); + + while (bytes_allocated < total_agg_size && col_end < output_size) { + auto const col_idx = col_end; + auto const next_col_size = + cudf::util::round_up_safe(cudf::type_dispatcher( + output_values.column(col_idx).type(), size_of_functor{}) * + cardinality, + ALIGNMENT); + auto const next_col_total_size = next_col_size + valid_col_size; + + if (bytes_allocated + next_col_total_size > total_agg_size) { + CUDF_UNREACHABLE("Not enough memory for shared memory aggregations"); + } + + shmem_agg_res_offsets[col_end] = bytes_allocated; + shmem_agg_mask_offsets[col_end] = bytes_allocated + next_col_size; + + bytes_allocated += next_col_total_size; + ++col_end; + } +} + +// Each block initialize its own shared memory aggregation results +__device__ void initialize_shmem_aggregations(cooperative_groups::thread_block const& block, + cudf::size_type col_start, + cudf::size_type col_end, + cudf::mutable_table_device_view output_values, + cuda::std::byte* shmem_agg_storage, + cudf::size_type* shmem_agg_res_offsets, + cudf::size_type* shmem_agg_mask_offsets, + cudf::size_type cardinality, + cudf::aggregation::Kind const* d_agg_kinds) +{ + for (auto col_idx = col_start; col_idx < col_end; col_idx++) { + for (auto idx = block.thread_rank(); idx < cardinality; idx += block.num_threads()) { + auto target = + reinterpret_cast(shmem_agg_storage + shmem_agg_res_offsets[col_idx]); + auto target_mask = + reinterpret_cast(shmem_agg_storage + shmem_agg_mask_offsets[col_idx]); + cudf::detail::dispatch_type_and_aggregation(output_values.column(col_idx).type(), + d_agg_kinds[col_idx], + initialize_shmem{}, + target, + target_mask, + idx); + } + } + block.sync(); +} + +__device__ void compute_pre_aggregrations(cudf::size_type col_start, + cudf::size_type col_end, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::table_device_view source, + cudf::size_type num_input_rows, + cudf::size_type* local_mapping_index, + cuda::std::byte* shmem_agg_storage, + cudf::size_type* shmem_agg_res_offsets, + cudf::size_type* shmem_agg_mask_offsets, + cudf::aggregation::Kind const* d_agg_kinds) +{ + // Aggregates global memory sources to shared memory targets + for (auto source_idx = cudf::detail::grid_1d::global_thread_id(); source_idx < num_input_rows; + source_idx += cudf::detail::grid_1d::grid_stride()) { + if (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, source_idx)) { + auto const target_idx = local_mapping_index[source_idx]; + for (auto col_idx = col_start; col_idx < col_end; col_idx++) { + auto const source_col = source.column(col_idx); + + cuda::std::byte* target = + reinterpret_cast(shmem_agg_storage + shmem_agg_res_offsets[col_idx]); + bool* target_mask = + reinterpret_cast(shmem_agg_storage + shmem_agg_mask_offsets[col_idx]); + + cudf::detail::dispatch_type_and_aggregation(source_col.type(), + d_agg_kinds[col_idx], + shmem_element_aggregator{}, + target, + target_mask, + target_idx, + source_col, + source_idx); + } + } + } +} + +__device__ void compute_final_aggregations(cooperative_groups::thread_block const& block, + cudf::size_type col_start, + cudf::size_type col_end, + cudf::table_device_view input_values, + cudf::mutable_table_device_view target, + cudf::size_type cardinality, + cudf::size_type* global_mapping_index, + cuda::std::byte* shmem_agg_storage, + cudf::size_type* agg_res_offsets, + cudf::size_type* agg_mask_offsets, + cudf::aggregation::Kind const* d_agg_kinds) +{ + // Aggregates shared memory sources to global memory targets + for (auto idx = block.thread_rank(); idx < cardinality; idx += block.num_threads()) { + auto const target_idx = + global_mapping_index[block.group_index().x * GROUPBY_SHM_MAX_ELEMENTS + idx]; + for (auto col_idx = col_start; col_idx < col_end; col_idx++) { + auto target_col = target.column(col_idx); + + cuda::std::byte* source = + reinterpret_cast(shmem_agg_storage + agg_res_offsets[col_idx]); + bool* source_mask = reinterpret_cast(shmem_agg_storage + agg_mask_offsets[col_idx]); + + cudf::detail::dispatch_type_and_aggregation(input_values.column(col_idx).type(), + d_agg_kinds[col_idx], + gmem_element_aggregator{}, + target_col, + target_idx, + input_values.column(col_idx), + source, + source_mask, + idx); + } + } + block.sync(); +} + +/* Takes the local_mapping_index and global_mapping_index to compute + * pre (shared) and final (global) aggregates*/ +CUDF_KERNEL void single_pass_shmem_aggs_kernel(cudf::size_type num_rows, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cudf::table_device_view input_values, + cudf::mutable_table_device_view output_values, + cudf::aggregation::Kind const* d_agg_kinds, + cudf::size_type total_agg_size, + cudf::size_type offsets_size) +{ + auto const block = cooperative_groups::this_thread_block(); + auto const cardinality = block_cardinality[block.group_index().x]; + if (cardinality >= GROUPBY_CARDINALITY_THRESHOLD) { return; } + + auto const num_cols = output_values.num_columns(); + + __shared__ cudf::size_type col_start; + __shared__ cudf::size_type col_end; + extern __shared__ cuda::std::byte shmem_agg_storage[]; + + cudf::size_type* shmem_agg_res_offsets = + reinterpret_cast(shmem_agg_storage + total_agg_size); + cudf::size_type* shmem_agg_mask_offsets = + reinterpret_cast(shmem_agg_storage + total_agg_size + offsets_size); + + if (block.thread_rank() == 0) { + col_start = 0; + col_end = 0; + } + block.sync(); + + while (col_end < num_cols) { + if (block.thread_rank() == 0) { + calculate_columns_to_aggregate(col_start, + col_end, + output_values, + num_cols, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + cardinality, + total_agg_size); + } + block.sync(); + + initialize_shmem_aggregations(block, + col_start, + col_end, + output_values, + shmem_agg_storage, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + cardinality, + d_agg_kinds); + + compute_pre_aggregrations(col_start, + col_end, + row_bitmask, + skip_rows_with_nulls, + input_values, + num_rows, + local_mapping_index, + shmem_agg_storage, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + d_agg_kinds); + block.sync(); + + compute_final_aggregations(block, + col_start, + col_end, + input_values, + output_values, + cardinality, + global_mapping_index, + shmem_agg_storage, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + d_agg_kinds); + } +} +} // namespace + +std::size_t available_shared_memory_size(cudf::size_type grid_size) +{ + auto const active_blocks_per_sm = + cudf::util::div_rounding_up_safe(grid_size, cudf::detail::num_multiprocessors()); + + size_t dynamic_shmem_size = 0; + CUDF_CUDA_TRY(cudaOccupancyAvailableDynamicSMemPerBlock( + &dynamic_shmem_size, single_pass_shmem_aggs_kernel, active_blocks_per_sm, GROUPBY_BLOCK_SIZE)); + return cudf::util::round_down_safe(static_cast(0.5 * dynamic_shmem_size), + ALIGNMENT); +} + +void compute_shared_memory_aggs(cudf::size_type grid_size, + std::size_t available_shmem_size, + cudf::size_type num_input_rows, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cudf::table_device_view input_values, + cudf::mutable_table_device_view output_values, + cudf::aggregation::Kind const* d_agg_kinds, + rmm::cuda_stream_view stream) +{ + // For each aggregation, need one offset determining where the aggregation is + // performed, another indicating the validity of the aggregation + auto const shmem_offsets_size = output_values.num_columns() * sizeof(cudf::size_type); + // The rest of shmem is utilized for the actual arrays in shmem + CUDF_EXPECTS(available_shmem_size > shmem_offsets_size * 2, + "No enough space for shared memory aggregations"); + auto const shmem_agg_size = available_shmem_size - shmem_offsets_size * 2; + single_pass_shmem_aggs_kernel<<>>( + num_input_rows, + row_bitmask, + skip_rows_with_nulls, + local_mapping_index, + global_mapping_index, + block_cardinality, + input_values, + output_values, + d_agg_kinds, + shmem_agg_size, + shmem_offsets_size); +} +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp b/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp new file mode 100644 index 00000000000..653821fd53b --- /dev/null +++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { + +std::size_t available_shared_memory_size(cudf::size_type grid_size); + +void compute_shared_memory_aggs(cudf::size_type grid_size, + std::size_t available_shmem_size, + cudf::size_type num_input_rows, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cudf::table_device_view input_values, + cudf::mutable_table_device_view output_values, + cudf::aggregation::Kind const* d_agg_kinds, + rmm::cuda_stream_view stream); + +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/helpers.cuh b/cpp/src/groupby/hash/helpers.cuh index 0d117ca35b3..00836567b4f 100644 --- a/cpp/src/groupby/hash/helpers.cuh +++ b/cpp/src/groupby/hash/helpers.cuh @@ -54,15 +54,6 @@ using shmem_extent_t = CUDF_HOST_DEVICE auto constexpr window_extent = cuco::make_window_extent(shmem_extent_t{}); -/** - * @brief Returns the smallest multiple of 8 that is greater than or equal to the given integer. - */ -CUDF_HOST_DEVICE constexpr std::size_t round_to_multiple_of_8(std::size_t num) -{ - std::size_t constexpr base = 8; - return cudf::util::div_rounding_up_safe(num, base) * base; -} - using row_hash_t = cudf::experimental::row::hash::device_row_hasher; diff --git a/cpp/src/groupby/hash/single_pass_functors.cuh b/cpp/src/groupby/hash/single_pass_functors.cuh index 73791b3aa71..28a5b578e00 100644 --- a/cpp/src/groupby/hash/single_pass_functors.cuh +++ b/cpp/src/groupby/hash/single_pass_functors.cuh @@ -23,6 +23,87 @@ #include namespace cudf::groupby::detail::hash { +// TODO: TO BE REMOVED issue tracked via #17171 +template +__device__ constexpr bool is_supported() +{ + return cudf::is_fixed_width() and + ((k == cudf::aggregation::SUM) or (k == cudf::aggregation::SUM_OF_SQUARES) or + (k == cudf::aggregation::MIN) or (k == cudf::aggregation::MAX) or + (k == cudf::aggregation::COUNT_VALID) or (k == cudf::aggregation::COUNT_ALL) or + (k == cudf::aggregation::ARGMIN) or (k == cudf::aggregation::ARGMAX) or + (k == cudf::aggregation::STD) or (k == cudf::aggregation::VARIANCE) or + (k == cudf::aggregation::PRODUCT) and cudf::detail::is_product_supported()); +} + +template +__device__ std::enable_if_t, void>, T> +identity_from_operator() +{ + using DeviceType = cudf::device_storage_type_t; + return cudf::detail::corresponding_operator_t::template identity(); +} + +template +__device__ std::enable_if_t, void>, T> +identity_from_operator() +{ + CUDF_UNREACHABLE("Unable to get identity/sentinel from device operator"); +} + +template +__device__ T get_identity() +{ + if ((k == cudf::aggregation::ARGMAX) or (k == cudf::aggregation::ARGMIN)) { + if constexpr (cudf::is_timestamp()) { + return k == cudf::aggregation::ARGMAX + ? T{typename T::duration(cudf::detail::ARGMAX_SENTINEL)} + : T{typename T::duration(cudf::detail::ARGMIN_SENTINEL)}; + } else { + using DeviceType = cudf::device_storage_type_t; + return k == cudf::aggregation::ARGMAX + ? static_cast(cudf::detail::ARGMAX_SENTINEL) + : static_cast(cudf::detail::ARGMIN_SENTINEL); + } + } + return identity_from_operator(); +} + +template +struct initialize_target_element { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type idx) const noexcept + { + CUDF_UNREACHABLE("Invalid source type and aggregation combination."); + } +}; + +template +struct initialize_target_element()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type idx) const noexcept + { + using DeviceType = cudf::device_storage_type_t; + DeviceType* target_casted = reinterpret_cast(target); + + target_casted[idx] = get_identity(); + + target_mask[idx] = (k == cudf::aggregation::COUNT_ALL) or (k == cudf::aggregation::COUNT_VALID); + } +}; + +struct initialize_shmem { + template + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type idx) const noexcept + { + initialize_target_element{}(target, target_mask, idx); + } +}; + /** * @brief Computes single-pass aggregations and store results into a sparse `output_values` table, * and populate `set` with indices of unique keys From 893d0fde7c17a7f8126baddd2f1cf34600f9420e Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 30 Oct 2024 20:46:27 -0400 Subject: [PATCH 58/76] Migrate NVText Tokenizing APIs to pylibcudf (#17100) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17100 --- cpp/include/nvtext/tokenize.hpp | 2 +- .../api_docs/pylibcudf/nvtext/index.rst | 1 + .../api_docs/pylibcudf/nvtext/tokenize.rst | 6 + python/cudf/cudf/_lib/nvtext/tokenize.pyx | 161 +++------- python/cudf/cudf/core/tokenize_vocabulary.py | 4 +- .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 2 +- .../pylibcudf/pylibcudf/nvtext/__init__.pxd | 2 + python/pylibcudf/pylibcudf/nvtext/__init__.py | 2 + .../pylibcudf/pylibcudf/nvtext/tokenize.pxd | 31 ++ .../pylibcudf/pylibcudf/nvtext/tokenize.pyx | 286 ++++++++++++++++++ .../pylibcudf/tests/test_nvtext_tokenize.py | 101 +++++++ 11 files changed, 476 insertions(+), 122 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/tokenize.rst create mode 100644 python/pylibcudf/pylibcudf/nvtext/tokenize.pxd create mode 100644 python/pylibcudf/pylibcudf/nvtext/tokenize.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp index e61601c6fea..e345587f88b 100644 --- a/cpp/include/nvtext/tokenize.hpp +++ b/cpp/include/nvtext/tokenize.hpp @@ -292,7 +292,7 @@ std::unique_ptr load_vocabulary( * @throw cudf::logic_error if `delimiter` is invalid * * @param input Strings column to tokenize - * @param vocabulary Used to lookup tokens within + * @param vocabulary Used to lookup tokens within `input` * @param delimiter Used to identify tokens within `input` * @param default_id The token id to be used for tokens not found in the `vocabulary`; * Default is -1 diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst index e0735a197fd..8c45942ed47 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst @@ -12,3 +12,4 @@ nvtext normalize replace stemmer + tokenize diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/tokenize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/tokenize.rst new file mode 100644 index 00000000000..85c5a27b09d --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/tokenize.rst @@ -0,0 +1,6 @@ +======== +tokenize +======== + +.. automodule:: pylibcudf.nvtext.tokenize + :members: diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx index a7e63f1e9ae..f473c48e2f7 100644 --- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx +++ b/python/cudf/cudf/_lib/nvtext/tokenize.pyx @@ -2,162 +2,85 @@ from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.tokenize cimport ( - character_tokenize as cpp_character_tokenize, - count_tokens as cpp_count_tokens, - detokenize as cpp_detokenize, - load_vocabulary as cpp_load_vocabulary, - tokenize as cpp_tokenize, - tokenize_vocabulary as cpp_tokenize_vocabulary, - tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, -) -from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type +from pylibcudf.nvtext.tokenize import TokenizeVocabulary # no-cython-lint + from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar + +from pylibcudf import nvtext @acquire_spill_lock() def _tokenize_scalar(Column strings, object py_delimiter): - - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_tokenize( - c_strings, - c_delimiter[0], - ) + return Column.from_pylibcudf( + nvtext.tokenize.tokenize_scalar( + strings.to_pylibcudf(mode="read"), + py_delimiter.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def _tokenize_column(Column strings, Column delimiters): - cdef column_view c_strings = strings.view() - cdef column_view c_delimiters = delimiters.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_tokenize( - c_strings, - c_delimiters - ) + return Column.from_pylibcudf( + nvtext.tokenize.tokenize_column( + strings.to_pylibcudf(mode="read"), + delimiters.to_pylibcudf(mode="read"), ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def _count_tokens_scalar(Column strings, object py_delimiter): - - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_count_tokens( - c_strings, - c_delimiter[0] - ) + return Column.from_pylibcudf( + nvtext.tokenize.count_tokens_scalar( + strings.to_pylibcudf(mode="read"), + py_delimiter.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def _count_tokens_column(Column strings, Column delimiters): - cdef column_view c_strings = strings.view() - cdef column_view c_delimiters = delimiters.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_count_tokens( - c_strings, - c_delimiters - ) + return Column.from_pylibcudf( + nvtext.tokenize.count_tokens_column( + strings.to_pylibcudf(mode="read"), + delimiters.to_pylibcudf(mode="read") ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def character_tokenize(Column strings): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_character_tokenize(c_strings) + return Column.from_pylibcudf( + nvtext.tokenize.character_tokenize( + strings.to_pylibcudf(mode="read") ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def detokenize(Column strings, Column indices, object py_separator): - - cdef DeviceScalar separator = py_separator.device_value - - cdef column_view c_strings = strings.view() - cdef column_view c_indices = indices.view() - cdef const string_scalar* c_separator = separator\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_detokenize(c_strings, c_indices, c_separator[0]) + return Column.from_pylibcudf( + nvtext.tokenize.detokenize( + strings.to_pylibcudf(mode="read"), + indices.to_pylibcudf(mode="read"), + py_separator.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) - - -cdef class TokenizeVocabulary: - cdef unique_ptr[cpp_tokenize_vocabulary] c_obj - - def __cinit__(self, Column vocab): - cdef column_view c_vocab = vocab.view() - with nogil: - self.c_obj = move(cpp_load_vocabulary(c_vocab)) + ) @acquire_spill_lock() def tokenize_with_vocabulary(Column strings, - TokenizeVocabulary vocabulary, + object vocabulary, object py_delimiter, size_type default_id): - - cdef DeviceScalar delimiter = py_delimiter.device_value - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_tokenize_with_vocabulary( - c_strings, - vocabulary.c_obj.get()[0], - c_delimiter[0], - default_id - ) + return Column.from_pylibcudf( + nvtext.tokenize.tokenize_with_vocabulary( + strings.to_pylibcudf(mode="read"), + vocabulary, + py_delimiter.device_value.c_value, + default_id ) - - return Column.from_unique_ptr(move(c_result)) + ) diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py index 99d85c0c5c0..f0ce6e9d5d1 100644 --- a/python/cudf/cudf/core/tokenize_vocabulary.py +++ b/python/cudf/cudf/core/tokenize_vocabulary.py @@ -20,7 +20,9 @@ class TokenizeVocabulary: """ def __init__(self, vocabulary: "cudf.Series"): - self.vocabulary = cpp_tokenize_vocabulary(vocabulary._column) + self.vocabulary = cpp_tokenize_vocabulary( + vocabulary._column.to_pylibcudf(mode="read") + ) def tokenize( self, text, delimiter: str = "", default_id: int = -1 diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt index d97c0a73267..8afbadc3020 100644 --- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx - ngrams_tokenize.pyx normalize.pyx replace.pyx stemmer.pyx + ngrams_tokenize.pyx normalize.pyx replace.pyx stemmer.pyx tokenize.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd index a658e57018e..504600b5e76 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd @@ -9,6 +9,7 @@ from . cimport ( normalize, replace, stemmer, + tokenize, ) __all__ = [ @@ -20,4 +21,5 @@ __all__ = [ "normalize", "replace", "stemmer", + "tokenize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index 2c1feb089a2..1d5246c6af7 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -9,6 +9,7 @@ normalize, replace, stemmer, + tokenize, ) __all__ = [ @@ -20,4 +21,5 @@ "normalize", "replace", "stemmer", + "tokenize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd new file mode 100644 index 00000000000..0254b91ad58 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd @@ -0,0 +1,31 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.tokenize cimport tokenize_vocabulary +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar + +cdef class TokenizeVocabulary: + cdef unique_ptr[tokenize_vocabulary] c_obj + +cpdef Column tokenize_scalar(Column input, Scalar delimiter=*) + +cpdef Column tokenize_column(Column input, Column delimiters) + +cpdef Column count_tokens_scalar(Column input, Scalar delimiter=*) + +cpdef Column count_tokens_column(Column input, Column delimiters) + +cpdef Column character_tokenize(Column input) + +cpdef Column detokenize(Column input, Column row_indices, Scalar separator=*) + +cpdef TokenizeVocabulary load_vocabulary(Column input) + +cpdef Column tokenize_with_vocabulary( + Column input, + TokenizeVocabulary vocabulary, + Scalar delimiter, + size_type default_id=* +) diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx new file mode 100644 index 00000000000..cdecfaabca2 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx @@ -0,0 +1,286 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.column.column_view cimport column_view +from pylibcudf.libcudf.nvtext.tokenize cimport ( + character_tokenize as cpp_character_tokenize, + count_tokens as cpp_count_tokens, + detokenize as cpp_detokenize, + load_vocabulary as cpp_load_vocabulary, + tokenize as cpp_tokenize, + tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, +) +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.libcudf.types cimport size_type + + +cdef class TokenizeVocabulary: + """The Vocabulary object to be used with ``tokenize_with_vocabulary``. + + For details, see :cpp:class:`cudf::nvtext::tokenize_vocabulary`. + """ + def __cinit__(self, Column vocab): + cdef column_view c_vocab = vocab.view() + with nogil: + self.c_obj = move(cpp_load_vocabulary(c_vocab)) + +cpdef Column tokenize_scalar(Column input, Scalar delimiter=None): + """ + Returns a single column of strings by tokenizing the input + strings column using the provided characters as delimiters. + + For details, see cpp:func:`cudf::nvtext::tokenize` + + Parameters + ---------- + input : Column + Strings column to tokenize + delimiter : Scalar + String scalar used to separate individual + strings into tokens + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + + with nogil: + c_result = cpp_tokenize( + input.view(), + dereference(delimiter.c_obj.get()), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column tokenize_column(Column input, Column delimiters): + """ + Returns a single column of strings by tokenizing the input + strings column using multiple strings as delimiters. + + For details, see cpp:func:`cudf::nvtext::tokenize` + + Parameters + ---------- + input : Column + Strings column to tokenize + delimiters : Column + Strings column used to separate individual strings into tokens + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_tokenize( + input.view(), + delimiters.view(), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None): + """ + Returns the number of tokens in each string of a strings column + using the provided characters as delimiters. + + For details, see cpp:func:`cudf::nvtext::count_tokens` + + Parameters + ---------- + input : Column + Strings column to count tokens + delimiters : Scalar] + String scalar used to separate each string into tokens + + Returns + ------- + Column + New column of token counts + """ + cdef unique_ptr[column] c_result + + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + + with nogil: + c_result = cpp_count_tokens( + input.view(), + dereference(delimiter.c_obj.get()), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column count_tokens_column(Column input, Column delimiters): + """ + Returns the number of tokens in each string of a strings column + using multiple strings as delimiters. + + For details, see cpp:func:`cudf::nvtext::count_tokens` + + Parameters + ---------- + input : Column + Strings column to count tokens + delimiters : Column + Strings column used to separate + each string into tokens + + Returns + ------- + Column + New column of token counts + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_count_tokens( + input.view(), + delimiters.view(), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column character_tokenize(Column input): + """ + Returns a single column of strings by converting + each character to a string. + + For details, see cpp:func:`cudf::nvtext::character_tokens` + + Parameters + ---------- + input : Column + Strings column to tokenize + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_character_tokenize(input.view()) + + return Column.from_libcudf(move(c_result)) + +cpdef Column detokenize( + Column input, + Column row_indices, + Scalar separator=None +): + """ + Creates a strings column from a strings column of tokens + and an associated column of row ids. + + For details, see cpp:func:`cudf::nvtext::detokenize` + + Parameters + ---------- + input : Column + Strings column to detokenize + row_indices : Column + The relative output row index assigned + for each token in the input column + separator : Scalar + String to append after concatenating + each token to the proper output row + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + + if separator is None: + separator = Scalar.from_libcudf( + cpp_make_string_scalar(" ".encode()) + ) + + with nogil: + c_result = cpp_detokenize( + input.view(), + row_indices.view(), + dereference(separator.c_obj.get()) + ) + + return Column.from_libcudf(move(c_result)) + +cpdef TokenizeVocabulary load_vocabulary(Column input): + """ + Create a ``TokenizeVocabulary`` object from a strings column. + + For details, see cpp:func:`cudf::nvtext::load_vocabulary` + + Parameters + ---------- + input : Column + Strings for the vocabulary + + Returns + ------- + TokenizeVocabulary + Object to be used with cpp:func:`cudf::nvtext::tokenize_with_vocabulary` + """ + return TokenizeVocabulary(input) + + +cpdef Column tokenize_with_vocabulary( + Column input, + TokenizeVocabulary vocabulary, + Scalar delimiter, + size_type default_id=-1 +): + """ + Returns the token ids for the input string by looking + up each delimited token in the given vocabulary. + + For details, see cpp:func:`cudf::nvtext::tokenize_with_vocabulary` + + Parameters + ---------- + input : Column + Strings column to tokenize + vocabulary : TokenizeVocabulary + Used to lookup tokens within ``input`` + delimiter : Scalar + Used to identify tokens within ``input`` + default_id : size_type + The token id to be used for tokens not found + in the vocabulary; Default is -1 + + Returns + ------- + Column + Lists column of token ids + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_tokenize_with_vocabulary( + input.view(), + dereference(vocabulary.c_obj.get()), + dereference(delimiter.c_obj.get()), + default_id + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py new file mode 100644 index 00000000000..4ec9a5ee1a5 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py @@ -0,0 +1,101 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture(scope="module") +def input_col(): + return pa.array(["a", "b c", "d.e:f;"]) + + +@pytest.mark.parametrize( + "delimiter", [None, plc.interop.from_arrow(pa.scalar("."))] +) +def test_tokenize_scalar(input_col, delimiter): + result = plc.nvtext.tokenize.tokenize_scalar( + plc.interop.from_arrow(input_col), delimiter + ) + if delimiter is None: + expected = pa.array(["a", "b", "c", "d.e:f;"]) + else: + expected = pa.array(["a", "b c", "d", "e:f;"]) + assert_column_eq(result, expected) + + +def test_tokenize_column(input_col): + delimiters = pa.array([" ", ".", ":", ";"]) + result = plc.nvtext.tokenize.tokenize_column( + plc.interop.from_arrow(input_col), plc.interop.from_arrow(delimiters) + ) + expected = pa.array(["a", "b", "c", "d", "e", "f"]) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize( + "delimiter", [None, plc.interop.from_arrow(pa.scalar("."))] +) +def test_count_tokens_scalar(input_col, delimiter): + result = plc.nvtext.tokenize.count_tokens_scalar( + plc.interop.from_arrow(input_col), delimiter + ) + if delimiter is None: + expected = pa.array([1, 2, 1], type=pa.int32()) + else: + expected = pa.array([1, 1, 2], type=pa.int32()) + assert_column_eq(result, expected) + + +def test_count_tokens_column(input_col): + delimiters = pa.array([" ", ".", ":", ";"]) + result = plc.nvtext.tokenize.count_tokens_column( + plc.interop.from_arrow(input_col), plc.interop.from_arrow(delimiters) + ) + expected = pa.array([1, 2, 3], type=pa.int32()) + assert_column_eq(result, expected) + + +def test_character_tokenize(input_col): + result = plc.nvtext.tokenize.character_tokenize( + plc.interop.from_arrow(input_col) + ) + expected = pa.array(["a", "b", " ", "c", "d", ".", "e", ":", "f", ";"]) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize( + "delimiter", [None, plc.interop.from_arrow(pa.scalar("."))] +) +def test_detokenize(input_col, delimiter): + row_indices = pa.array([0, 0, 1]) + result = plc.nvtext.tokenize.detokenize( + plc.interop.from_arrow(input_col), plc.interop.from_arrow(row_indices) + ) + expected = pa.array(["a b c", "d.e:f;"]) + assert_column_eq(result, expected) + + +def test_load_vocabulary(input_col): + result = plc.nvtext.tokenize.load_vocabulary( + plc.interop.from_arrow(input_col) + ) + assert isinstance(result, plc.nvtext.tokenize.TokenizeVocabulary) + + +@pytest.mark.parametrize("default_id", [-1, 0]) +def test_tokenize_with_vocabulary(input_col, default_id): + result = plc.nvtext.tokenize.tokenize_with_vocabulary( + plc.interop.from_arrow(input_col), + plc.nvtext.tokenize.load_vocabulary(plc.interop.from_arrow(input_col)), + plc.interop.from_arrow(pa.scalar(" ")), + default_id, + ) + pa_result = plc.interop.to_arrow(result) + if default_id == -1: + expected = pa.array([[0], [-1, -1], [2]], type=pa_result.type) + else: + expected = pa.array([[0], [0, 0], [2]], type=pa_result.type) + assert_column_eq(result, expected) From 3f66087c6976433a02c05135ab9c83564118846a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:49:00 -0700 Subject: [PATCH 59/76] Fix some documentation rendering for pylibcudf (#17217) * Fixed/modified some title headers * Fixed/added pylibcudf section docstrings Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Matthew Murray (https://github.com/Matt711) URL: https://github.com/rapidsai/cudf/pull/17217 --- .../user_guide/api_docs/pylibcudf/strings/findall.rst | 6 +++--- .../source/user_guide/api_docs/pylibcudf/table.rst | 2 +- python/pylibcudf/pylibcudf/binaryop.pyx | 1 + python/pylibcudf/pylibcudf/filling.pyx | 5 +++++ python/pylibcudf/pylibcudf/strings/__init__.py | 1 + python/pylibcudf/pylibcudf/strings/regex_program.pyx | 4 ++++ python/pylibcudf/pylibcudf/strings/replace.pyx | 1 + python/pylibcudf/pylibcudf/types.pyx | 10 ++++++++++ 8 files changed, 26 insertions(+), 4 deletions(-) diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst index 9850ee10098..699e38ebbe5 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst @@ -1,6 +1,6 @@ -==== -find -==== +======= +findall +======= .. automodule:: pylibcudf.strings.findall :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst index e39ca18a12b..4de9bced86f 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst @@ -1,5 +1,5 @@ ===== -Table +table ===== .. automodule:: pylibcudf.table diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx index 51b2b4cfaa3..eef73bf4e9d 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyx +++ b/python/pylibcudf/pylibcudf/binaryop.pyx @@ -100,6 +100,7 @@ cpdef bool is_supported_operation( The right hand side data type. op : BinaryOperator The operation to check. + Returns ------- bool diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx index 0372e1132cc..a47004a1e42 100644 --- a/python/pylibcudf/pylibcudf/filling.pyx +++ b/python/pylibcudf/pylibcudf/filling.pyx @@ -77,6 +77,10 @@ cpdef void fill_in_place( The index at which to stop filling. value : Scalar The value to fill with. + + Returns + ------- + None """ with nogil: @@ -101,6 +105,7 @@ cpdef Column sequence(size_type size, Scalar init, Scalar step): The initial value of the sequence step : Scalar The step of the sequence + Returns ------- pylibcudf.Column diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py index 40fa8261905..fa7294c7dbd 100644 --- a/python/pylibcudf/pylibcudf/strings/__init__.py +++ b/python/pylibcudf/pylibcudf/strings/__init__.py @@ -32,6 +32,7 @@ "capitalize", "case", "char_types", + "combine", "contains", "convert", "extract", diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyx b/python/pylibcudf/pylibcudf/strings/regex_program.pyx index f426b6888ae..91f585cd637 100644 --- a/python/pylibcudf/pylibcudf/strings/regex_program.pyx +++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyx @@ -37,6 +37,10 @@ cdef class RegexProgram: flags : Uniont[int, RegexFlags] Regex flags for interpreting special characters in the pattern + Returns + ------- + RegexProgram + A new RegexProgram """ cdef unique_ptr[regex_program] c_prog cdef regex_flags c_flags diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx index 6db7f04fcbb..2b94f5e3fee 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace.pyx @@ -136,6 +136,7 @@ cpdef Column replace_slice( Start position where repl will be added. stop : size_type, default -1 End position (exclusive) to use for replacement. + Returns ------- pylibcudf.Column diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx index 58c7d97e9bc..a0c31f994a3 100644 --- a/python/pylibcudf/pylibcudf/types.pyx +++ b/python/pylibcudf/pylibcudf/types.pyx @@ -79,6 +79,16 @@ cpdef size_type size_of(DataType t): Only fixed-width types are supported. For details, see :cpp:func:`size_of`. + + Parameters + ---------- + t : DataType + The DataType to get the size of. + + Returns + ------- + int + Size in bytes of an element of the specified type. """ with nogil: return cpp_size_of(t.c_obj) From 0db2463a70c258086a123d93455d1061871868fb Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 30 Oct 2024 23:20:17 -0400 Subject: [PATCH 60/76] Migrate NVText Byte Pair Encoding APIs to pylibcudf (#17101) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17101 --- .../pylibcudf/nvtext/byte_pair_encode.rst | 6 ++ .../api_docs/pylibcudf/nvtext/index.rst | 1 + .../cudf/_lib/nvtext/byte_pair_encode.pyx | 45 +++--------- python/cudf/cudf/core/byte_pair_encoding.py | 7 +- .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 5 +- .../pylibcudf/pylibcudf/nvtext/__init__.pxd | 2 + python/pylibcudf/pylibcudf/nvtext/__init__.py | 2 + .../pylibcudf/nvtext/byte_pair_encode.pxd | 16 +++++ .../pylibcudf/nvtext/byte_pair_encode.pyx | 70 +++++++++++++++++++ .../tests/test_nvtext_byte_pair_encode.py | 46 ++++++++++++ 10 files changed, 160 insertions(+), 40 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/byte_pair_encode.rst create mode 100644 python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd create mode 100644 python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_byte_pair_encode.py diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/byte_pair_encode.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/byte_pair_encode.rst new file mode 100644 index 00000000000..908fcc4fde6 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/byte_pair_encode.rst @@ -0,0 +1,6 @@ +================ +byte_pair_encode +================ + +.. automodule:: pylibcudf.nvtext.byte_pair_encode + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst index 8c45942ed47..00314bceeb9 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst @@ -8,6 +8,7 @@ nvtext generate_ngrams jaccard minhash + byte_pair_encode ngrams_tokenize normalize replace diff --git a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx index 0d768e24f39..2b2762eead2 100644 --- a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx +++ b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx @@ -3,49 +3,22 @@ from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.byte_pair_encode cimport ( - bpe_merge_pairs as cpp_bpe_merge_pairs, - byte_pair_encoding as cpp_byte_pair_encoding, - load_merge_pairs as cpp_load_merge_pairs, -) -from pylibcudf.libcudf.scalar.scalar cimport string_scalar - from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar - -cdef class BPEMergePairs: - cdef unique_ptr[cpp_bpe_merge_pairs] c_obj - - def __cinit__(self, Column merge_pairs): - cdef column_view c_pairs = merge_pairs.view() - with nogil: - self.c_obj = move(cpp_load_merge_pairs(c_pairs)) +from pylibcudf import nvtext +from pylibcudf.nvtext.byte_pair_encode import BPEMergePairs # no-cython-lint @acquire_spill_lock() def byte_pair_encoding( Column strings, - BPEMergePairs merge_pairs, + object merge_pairs, object separator ): - cdef column_view c_strings = strings.view() - cdef DeviceScalar d_separator = separator.device_value - cdef const string_scalar* c_separator = d_separator\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_byte_pair_encoding( - c_strings, - merge_pairs.c_obj.get()[0], - c_separator[0] - ) + return Column.from_pylibcudf( + nvtext.byte_pair_encode.byte_pair_encoding( + strings.to_pylibcudf(mode="read"), + merge_pairs, + separator.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) + ) diff --git a/python/cudf/cudf/core/byte_pair_encoding.py b/python/cudf/cudf/core/byte_pair_encoding.py index 6ca64a0a2be..8d38a5f2272 100644 --- a/python/cudf/cudf/core/byte_pair_encoding.py +++ b/python/cudf/cudf/core/byte_pair_encoding.py @@ -2,9 +2,10 @@ from __future__ import annotations +import pylibcudf as plc + import cudf from cudf._lib.nvtext.byte_pair_encode import ( - BPEMergePairs as cpp_merge_pairs, byte_pair_encoding as cpp_byte_pair_encoding, ) @@ -25,7 +26,9 @@ class BytePairEncoder: """ def __init__(self, merges_pair: "cudf.Series"): - self.merge_pairs = cpp_merge_pairs(merges_pair._column) + self.merge_pairs = plc.nvtext.byte_pair_encode.BPEMergePairs( + merges_pair._column.to_pylibcudf(mode="read") + ) def __call__(self, text, separator: str = " ") -> cudf.Series: """ diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt index 8afbadc3020..fa3cd448fa3 100644 --- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt @@ -12,8 +12,9 @@ # the License. # ============================================================================= -set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx - ngrams_tokenize.pyx normalize.pyx replace.pyx stemmer.pyx tokenize.pyx +set(cython_sources + edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx ngrams_tokenize.pyx normalize.pyx + replace.pyx stemmer.pyx tokenize.pyx byte_pair_encode.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd index 504600b5e76..a9ede17761b 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd @@ -1,6 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from . cimport ( + byte_pair_encode, edit_distance, generate_ngrams, jaccard, @@ -17,6 +18,7 @@ __all__ = [ "generate_ngrams", "jaccard", "minhash", + "byte_pair_encode" "ngrams_tokenize", "normalize", "replace", diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index 1d5246c6af7..87650a02c33 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from . import ( + byte_pair_encode, edit_distance, generate_ngrams, jaccard, @@ -17,6 +18,7 @@ "generate_ngrams", "jaccard", "minhash", + "byte_pair_encode", "ngrams_tokenize", "normalize", "replace", diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd new file mode 100644 index 00000000000..e4b93e96b9d --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.byte_pair_encode cimport bpe_merge_pairs +from pylibcudf.scalar cimport Scalar + + +cdef class BPEMergePairs: + cdef unique_ptr[bpe_merge_pairs] c_obj + +cpdef Column byte_pair_encoding( + Column input, + BPEMergePairs merge_pairs, + Scalar separator=* +) diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx new file mode 100644 index 00000000000..76caad276d4 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx @@ -0,0 +1,70 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.column.column_view cimport column_view +from pylibcudf.libcudf.nvtext.byte_pair_encode cimport ( + byte_pair_encoding as cpp_byte_pair_encoding, + load_merge_pairs as cpp_load_merge_pairs, +) +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.scalar cimport Scalar + + +cdef class BPEMergePairs: + """The table of merge pairs for the BPE encoder. + + For details, see :cpp:class:`cudf::nvtext::bpe_merge_pairs`. + """ + def __cinit__(self, Column merge_pairs): + cdef column_view c_pairs = merge_pairs.view() + with nogil: + self.c_obj = move(cpp_load_merge_pairs(c_pairs)) + +cpdef Column byte_pair_encoding( + Column input, + BPEMergePairs merge_pairs, + Scalar separator=None +): + """ + Byte pair encode the input strings. + + For details, see cpp:func:`cudf::nvtext::byte_pair_encoding` + + Parameters + ---------- + input : Column + Strings to encode. + merge_pairs : BPEMergePairs + Substrings to rebuild each string on. + separator : Scalar + String used to build the output after encoding. Default is a space. + + Returns + ------- + Column + An encoded column of strings. + """ + cdef unique_ptr[column] c_result + + if separator is None: + separator = Scalar.from_libcudf( + cpp_make_string_scalar(" ".encode()) + ) + + with nogil: + c_result = move( + cpp_byte_pair_encoding( + input.view(), + dereference(merge_pairs.c_obj.get()), + dereference(separator.c_obj.get()), + ) + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_byte_pair_encode.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_byte_pair_encode.py new file mode 100644 index 00000000000..7d6718a959b --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_byte_pair_encode.py @@ -0,0 +1,46 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture(scope="module") +def input_col(): + return pa.array( + [ + "e n", + "i t", + "i s", + "e s", + "en t", + "c e", + "es t", + "en ce", + "t est", + "s ent", + ] + ) + + +@pytest.mark.parametrize( + "separator", [None, plc.interop.from_arrow(pa.scalar("e"))] +) +def test_byte_pair_encoding(input_col, separator): + plc_col = plc.interop.from_arrow( + pa.array(["test sentence", "thisis test"]) + ) + result = plc.nvtext.byte_pair_encode.byte_pair_encoding( + plc_col, + plc.nvtext.byte_pair_encode.BPEMergePairs( + plc.interop.from_arrow(input_col) + ), + separator, + ) + if separator is None: + expected = pa.array(["test sent ence", "t h is is test"]) + else: + expected = pa.array(["teste esenteence", "teheiseise etest"]) + assert_column_eq(result, expected) From a69de571ce28a4b71b78ce03f7d34ec70486415e Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 30 Oct 2024 22:21:42 -0500 Subject: [PATCH 61/76] Migrate hashing operations to `pylibcudf` (#15418) This PR creates `pylibcudf` hashing APIs and modifies the cuDF Cython to leverage them. cc @vyasr Authors: - https://github.com/brandon-b-miller Approvers: - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/15418 --- .../all_cuda-118_arch-x86_64.yaml | 2 + .../all_cuda-125_arch-x86_64.yaml | 2 + cpp/include/cudf/hashing.hpp | 18 +- cpp/src/hash/md5_hash.cu | 3 +- cpp/src/hash/sha_hash.cuh | 3 +- cpp/tests/hashing/sha1_test.cpp | 4 +- cpp/tests/hashing/sha224_test.cpp | 4 +- cpp/tests/hashing/sha256_test.cpp | 4 +- cpp/tests/hashing/sha384_test.cpp | 4 +- cpp/tests/hashing/sha512_test.cpp | 4 +- dependencies.yaml | 5 +- .../user_guide/api_docs/pylibcudf/hashing.rst | 6 + .../user_guide/api_docs/pylibcudf/index.rst | 1 + python/cudf/cudf/_lib/hash.pyx | 57 ++-- python/cudf/pyproject.toml | 2 + python/pylibcudf/pylibcudf/CMakeLists.txt | 1 + python/pylibcudf/pylibcudf/__init__.pxd | 2 + python/pylibcudf/pylibcudf/__init__.py | 2 + python/pylibcudf/pylibcudf/hashing.pxd | 30 ++ python/pylibcudf/pylibcudf/hashing.pyx | 240 ++++++++++++++++ python/pylibcudf/pylibcudf/libcudf/hash.pxd | 41 +-- python/pylibcudf/pylibcudf/libcudf/hash.pyx | 0 python/pylibcudf/pylibcudf/tests/conftest.py | 12 +- .../pylibcudf/pylibcudf/tests/test_hashing.py | 269 ++++++++++++++++++ 24 files changed, 639 insertions(+), 77 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/hashing.rst create mode 100644 python/pylibcudf/pylibcudf/hashing.pxd create mode 100644 python/pylibcudf/pylibcudf/hashing.pyx create mode 100644 python/pylibcudf/pylibcudf/libcudf/hash.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_hashing.py diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index f3bbaaa8779..24dc3c9a7cc 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -46,6 +46,7 @@ dependencies: - librdkafka>=2.5.0,<2.6.0a0 - librmm==24.12.*,>=0.0.0a0 - make +- mmh3 - moto>=4.0.8 - msgpack-python - myst-nb @@ -76,6 +77,7 @@ dependencies: - pytest-xdist - pytest<8 - python-confluent-kafka>=2.5.0,<2.6.0a0 +- python-xxhash - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 38c5b361f70..a2bb2a3fe7f 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -45,6 +45,7 @@ dependencies: - librdkafka>=2.5.0,<2.6.0a0 - librmm==24.12.*,>=0.0.0a0 - make +- mmh3 - moto>=4.0.8 - msgpack-python - myst-nb @@ -74,6 +75,7 @@ dependencies: - pytest-xdist - pytest<8 - python-confluent-kafka>=2.5.0,<2.6.0a0 +- python-xxhash - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 0c5327edb91..307a52cd242 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -22,26 +22,27 @@ namespace CUDF_EXPORT cudf { -/** - * @addtogroup column_hash - * @{ - * @file - */ - /** * @brief Type of hash value - * + * @ingroup column_hash */ using hash_value_type = uint32_t; /** * @brief The default seed value for hash functions + * @ingroup column_hash */ static constexpr uint32_t DEFAULT_HASH_SEED = 0; //! Hash APIs namespace hashing { +/** + * @addtogroup column_hash + * @{ + * @file + */ + /** * @brief Computes the MurmurHash3 32-bit hash value of each row in the given table * @@ -183,7 +184,8 @@ std::unique_ptr xxhash_64( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** @} */ // end of group + } // namespace hashing -/** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index c7bfd4aecf4..a0c51940c87 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -302,7 +302,8 @@ std::unique_ptr md5(table_view const& input, } return md5_leaf_type_check(col.type()); }), - "Unsupported column type for hash function."); + "Unsupported column type for hash function.", + cudf::data_type_error); // Digest size in bytes auto constexpr digest_size = 32; diff --git a/cpp/src/hash/sha_hash.cuh b/cpp/src/hash/sha_hash.cuh index ebaec8e2775..eb002cf9c6f 100644 --- a/cpp/src/hash/sha_hash.cuh +++ b/cpp/src/hash/sha_hash.cuh @@ -513,7 +513,8 @@ std::unique_ptr sha_hash(table_view const& input, CUDF_EXPECTS( std::all_of( input.begin(), input.end(), [](auto const& col) { return sha_leaf_type_check(col.type()); }), - "Unsupported column type for hash function."); + "Unsupported column type for hash function.", + cudf::data_type_error); // Result column allocation and creation auto begin = thrust::make_constant_iterator(Hasher::digest_size); diff --git a/cpp/tests/hashing/sha1_test.cpp b/cpp/tests/hashing/sha1_test.cpp index 1e86751bb4c..3aa0bda6ae8 100644 --- a/cpp/tests/hashing/sha1_test.cpp +++ b/cpp/tests/hashing/sha1_test.cpp @@ -136,7 +136,7 @@ TEST_F(SHA1HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha1(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha1(input), cudf::data_type_error); } TEST_F(SHA1HashTest, StructsUnsupported) @@ -145,7 +145,7 @@ TEST_F(SHA1HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha1(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha1(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha224_test.cpp b/cpp/tests/hashing/sha224_test.cpp index 259e7102ee2..3f6aeb9d5e6 100644 --- a/cpp/tests/hashing/sha224_test.cpp +++ b/cpp/tests/hashing/sha224_test.cpp @@ -136,7 +136,7 @@ TEST_F(SHA224HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha224(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha224(input), cudf::data_type_error); } TEST_F(SHA224HashTest, StructsUnsupported) @@ -145,7 +145,7 @@ TEST_F(SHA224HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha224(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha224(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha256_test.cpp b/cpp/tests/hashing/sha256_test.cpp index a4affc87874..9519e96fbae 100644 --- a/cpp/tests/hashing/sha256_test.cpp +++ b/cpp/tests/hashing/sha256_test.cpp @@ -135,7 +135,7 @@ TEST_F(SHA256HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha256(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha256(input), cudf::data_type_error); } TEST_F(SHA256HashTest, StructsUnsupported) @@ -144,7 +144,7 @@ TEST_F(SHA256HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha256(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha256(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha384_test.cpp b/cpp/tests/hashing/sha384_test.cpp index 8a5c090eeea..9de566b9d9b 100644 --- a/cpp/tests/hashing/sha384_test.cpp +++ b/cpp/tests/hashing/sha384_test.cpp @@ -154,7 +154,7 @@ TEST_F(SHA384HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha384(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha384(input), cudf::data_type_error); } TEST_F(SHA384HashTest, StructsUnsupported) @@ -163,7 +163,7 @@ TEST_F(SHA384HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha384(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha384(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha512_test.cpp b/cpp/tests/hashing/sha512_test.cpp index 77fc56b5f13..95e5245f38e 100644 --- a/cpp/tests/hashing/sha512_test.cpp +++ b/cpp/tests/hashing/sha512_test.cpp @@ -154,7 +154,7 @@ TEST_F(SHA512HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha512(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha512(input), cudf::data_type_error); } TEST_F(SHA512HashTest, StructsUnsupported) @@ -163,7 +163,7 @@ TEST_F(SHA512HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha512(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha512(input), cudf::data_type_error); } template diff --git a/dependencies.yaml b/dependencies.yaml index bd1a5deb878..12038c5e503 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -828,6 +828,7 @@ dependencies: - pytest-benchmark - pytest-cases>=3.8.2 - scipy + - mmh3 - output_types: conda packages: - aiobotocore>=2.2.0 @@ -836,12 +837,14 @@ dependencies: - msgpack-python - moto>=4.0.8 - s3fs>=2022.3.0 - - output_types: pyproject + - python-xxhash + - output_types: [pyproject, requirements] packages: - msgpack - &tokenizers tokenizers==0.15.2 - &transformers transformers==4.39.3 - tzdata + - xxhash specific: - output_types: [conda, requirements] matrices: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/hashing.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/hashing.rst new file mode 100644 index 00000000000..6bd1fbd821b --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/hashing.rst @@ -0,0 +1,6 @@ +======= +hashing +======= + +.. automodule:: pylibcudf.hashing + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst index 62e14a67ee5..997ece6d29c 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst @@ -19,6 +19,7 @@ This page provides API documentation for pylibcudf. filling gpumemoryview groupby + hashing interop join json diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index 9b7ab0888d2..89309b36371 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -1,27 +1,12 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cudf.core.buffer import acquire_spill_lock +import pylibcudf as plc -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move +from cudf.core.buffer import acquire_spill_lock -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.hash cimport ( - md5, - murmurhash3_x86_32, - sha1, - sha224, - sha256, - sha384, - sha512, - xxhash_64, -) -from pylibcudf.libcudf.table.table_view cimport table_view +from pylibcudf.table cimport Table from cudf._lib.column cimport Column -from cudf._lib.utils cimport table_view_from_columns - -import pylibcudf as plc @acquire_spill_lock() @@ -37,32 +22,26 @@ def hash_partition(list source_columns, list columns_to_hash, @acquire_spill_lock() def hash(list source_columns, str method, int seed=0): - cdef table_view c_source_view = table_view_from_columns(source_columns) - cdef unique_ptr[column] c_result + cdef Table ctbl = Table( + [c.to_pylibcudf(mode="read") for c in source_columns] + ) if method == "murmur3": - with nogil: - c_result = move(murmurhash3_x86_32(c_source_view, seed)) + return Column.from_pylibcudf(plc.hashing.murmurhash3_x86_32(ctbl, seed)) + elif method == "xxhash64": + return Column.from_pylibcudf(plc.hashing.xxhash_64(ctbl, seed)) elif method == "md5": - with nogil: - c_result = move(md5(c_source_view)) + return Column.from_pylibcudf(plc.hashing.md5(ctbl)) elif method == "sha1": - with nogil: - c_result = move(sha1(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha1(ctbl)) elif method == "sha224": - with nogil: - c_result = move(sha224(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha224(ctbl)) elif method == "sha256": - with nogil: - c_result = move(sha256(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha256(ctbl)) elif method == "sha384": - with nogil: - c_result = move(sha384(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha384(ctbl)) elif method == "sha512": - with nogil: - c_result = move(sha512(c_source_view)) - elif method == "xxhash64": - with nogil: - c_result = move(xxhash_64(c_source_view, seed)) + return Column.from_pylibcudf(plc.hashing.sha512(ctbl)) else: - raise ValueError(f"Unsupported hash function: {method}") - return Column.from_unique_ptr(move(c_result)) + raise ValueError( + f"Unsupported hashing algorithm {method}." + ) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 80201dd84db..b6105c17b3e 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -53,6 +53,7 @@ test = [ "cramjam", "fastavro>=0.22.9", "hypothesis", + "mmh3", "msgpack", "pytest-benchmark", "pytest-cases>=3.8.2", @@ -63,6 +64,7 @@ test = [ "tokenizers==0.15.2", "transformers==4.39.3", "tzdata", + "xxhash", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. pandas-tests = [ "ipython", diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt index 15dd2b4c34f..b1d9656afc2 100644 --- a/python/pylibcudf/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/CMakeLists.txt @@ -26,6 +26,7 @@ set(cython_sources filling.pyx gpumemoryview.pyx groupby.pyx + hashing.pyx interop.pyx join.pyx json.pyx diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd index 9bdfdab97c2..aa2ce957173 100644 --- a/python/pylibcudf/pylibcudf/__init__.pxd +++ b/python/pylibcudf/pylibcudf/__init__.pxd @@ -13,6 +13,7 @@ from . cimport ( expressions, filling, groupby, + hashing, interop, join, json, @@ -63,6 +64,7 @@ __all__ = [ "filling", "gpumemoryview", "groupby", + "hashing", "interop", "join", "json", diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py index 4033062b7e2..62a2170f83e 100644 --- a/python/pylibcudf/pylibcudf/__init__.py +++ b/python/pylibcudf/pylibcudf/__init__.py @@ -22,6 +22,7 @@ expressions, filling, groupby, + hashing, interop, io, join, @@ -73,6 +74,7 @@ "filling", "gpumemoryview", "groupby", + "hashing", "interop", "io", "join", diff --git a/python/pylibcudf/pylibcudf/hashing.pxd b/python/pylibcudf/pylibcudf/hashing.pxd new file mode 100644 index 00000000000..2d070ddda69 --- /dev/null +++ b/python/pylibcudf/pylibcudf/hashing.pxd @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libc.stdint cimport uint32_t, uint64_t + +from .column cimport Column +from .table cimport Table + + +cpdef Column murmurhash3_x86_32( + Table input, + uint32_t seed=* +) + +cpdef Table murmurhash3_x64_128( + Table input, + uint64_t seed=* +) + + +cpdef Column xxhash_64( + Table input, + uint64_t seed=* +) + +cpdef Column md5(Table input) +cpdef Column sha1(Table input) +cpdef Column sha224(Table input) +cpdef Column sha256(Table input) +cpdef Column sha384(Table input) +cpdef Column sha512(Table input) diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx new file mode 100644 index 00000000000..9ea3d4d1bda --- /dev/null +++ b/python/pylibcudf/pylibcudf/hashing.pyx @@ -0,0 +1,240 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from libc.stdint cimport uint32_t, uint64_t +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.hash cimport ( + DEFAULT_HASH_SEED, + md5 as cpp_md5, + murmurhash3_x64_128 as cpp_murmurhash3_x64_128, + murmurhash3_x86_32 as cpp_murmurhash3_x86_32, + sha1 as cpp_sha1, + sha224 as cpp_sha224, + sha256 as cpp_sha256, + sha384 as cpp_sha384, + sha512 as cpp_sha512, + xxhash_64 as cpp_xxhash_64, +) +from pylibcudf.libcudf.table.table cimport table + +from .column cimport Column +from .table cimport Table + +LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED + +cpdef Column murmurhash3_x86_32( + Table input, + uint32_t seed=DEFAULT_HASH_SEED +): + """Computes the MurmurHash3 32-bit hash value of each row in the given table. + + For details, see :cpp:func:`murmurhash3_x86_32`. + + Parameters + ---------- + input : Table + The table of columns to hash + seed : uint32_t + Optional seed value to use for the hash function + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_murmurhash3_x86_32( + input.view(), + seed + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Table murmurhash3_x64_128( + Table input, + uint64_t seed=DEFAULT_HASH_SEED +): + """Computes the MurmurHash3 64-bit hash value of each row in the given table. + + For details, see :cpp:func:`murmurhash3_x64_128`. + + Parameters + ---------- + input : Table + The table of columns to hash + seed : uint64_t + Optional seed value to use for the hash function + + Returns + ------- + pylibcudf.Table + A table of two UINT64 columns + """ + cdef unique_ptr[table] c_result + with nogil: + c_result = cpp_murmurhash3_x64_128( + input.view(), + seed + ) + + return Table.from_libcudf(move(c_result)) + + +cpdef Column xxhash_64( + Table input, + uint64_t seed=DEFAULT_HASH_SEED +): + """Computes the xxHash 64-bit hash value of each row in the given table. + + For details, see :cpp:func:`xxhash_64`. + + Parameters + ---------- + input : Table + The table of columns to hash + seed : uint64_t + Optional seed value to use for the hash function + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_xxhash_64( + input.view(), + seed + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column md5(Table input): + """Computes the MD5 hash value of each row in the given table. + + For details, see :cpp:func:`md5`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the md5 hash of a row from the input + + """ + + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_md5(input.view()) + return Column.from_libcudf(move(c_result)) + +cpdef Column sha1(Table input): + """Computes the SHA-1 hash value of each row in the given table. + + For details, see :cpp:func:`sha1`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha1(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha224(Table input): + """Computes the SHA-224 hash value of each row in the given table. + + For details, see :cpp:func:`sha224`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha224(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha256(Table input): + """Computes the SHA-256 hash value of each row in the given table. + + For details, see :cpp:func:`sha256`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha256(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha384(Table input): + """Computes the SHA-384 hash value of each row in the given table. + + For details, see :cpp:func:`sha384`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha384(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha512(Table input): + """Computes the SHA-512 hash value of each row in the given table. + + For details, see :cpp:func:`sha512`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha512(input.view()) + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd index 51678ba69d8..c4222bc9dc5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/hash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd @@ -3,6 +3,7 @@ from libc.stdint cimport uint32_t, uint64_t from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view @@ -10,36 +11,44 @@ from pylibcudf.libcudf.table.table_view cimport table_view cdef extern from "cudf/hashing.hpp" namespace "cudf::hashing" nogil: - cdef unique_ptr[column] murmurhash3_x86_32 "cudf::hashing::murmurhash3_x86_32" ( + cdef unique_ptr[column] murmurhash3_x86_32( const table_view& input, const uint32_t seed - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] md5 "cudf::hashing::md5" ( + cdef unique_ptr[table] murmurhash3_x64_128( + const table_view& input, + const uint64_t seed + ) except +libcudf_exception_handler + + cdef unique_ptr[column] md5( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha1 "cudf::hashing::sha1" ( + cdef unique_ptr[column] sha1( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha224 "cudf::hashing::sha224" ( + cdef unique_ptr[column] sha224( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha256 "cudf::hashing::sha256" ( + cdef unique_ptr[column] sha256( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha384 "cudf::hashing::sha384" ( + cdef unique_ptr[column] sha384( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha512 "cudf::hashing::sha512" ( + cdef unique_ptr[column] sha512( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] xxhash_64 "cudf::hashing::xxhash_64" ( + cdef unique_ptr[column] xxhash_64( const table_view& input, const uint64_t seed - ) except + + ) except +libcudf_exception_handler + +cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil: + cdef uint32_t DEFAULT_HASH_SEED diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pyx b/python/pylibcudf/pylibcudf/libcudf/hash.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/pylibcudf/pylibcudf/tests/conftest.py b/python/pylibcudf/pylibcudf/tests/conftest.py index a19a8835498..5265e411c7f 100644 --- a/python/pylibcudf/pylibcudf/tests/conftest.py +++ b/python/pylibcudf/pylibcudf/tests/conftest.py @@ -18,13 +18,23 @@ from utils import ALL_PA_TYPES, DEFAULT_PA_TYPES, NUMERIC_PA_TYPES -# This fixture defines the standard set of types that all tests should default to +def _type_to_str(typ): + if isinstance(typ, pa.ListType): + return f"list[{_type_to_str(typ.value_type)}]" + elif isinstance(typ, pa.StructType): + return f"struct[{', '.join(_type_to_str(typ.field(i).type) for i in range(typ.num_fields))}]" + else: + return str(typ) + + +# This fixture defines [the standard set of types that all tests should default to # running on. If there is a need for some tests to run on a different set of types, that # type list fixture should also be defined below here if it is likely to be reused # across modules. Otherwise it may be defined on a per-module basis. @pytest.fixture( scope="session", params=DEFAULT_PA_TYPES, + ids=_type_to_str, ) def pa_type(request): return request.param diff --git a/python/pylibcudf/pylibcudf/tests/test_hashing.py b/python/pylibcudf/pylibcudf/tests/test_hashing.py new file mode 100644 index 00000000000..83fb50fa4ef --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_hashing.py @@ -0,0 +1,269 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import hashlib +import struct + +import mmh3 +import numpy as np +import pyarrow as pa +import pytest +import xxhash +from utils import assert_column_eq, assert_table_eq + +import pylibcudf as plc + +SEED = 0 +METHODS = ["md5", "sha1", "sha224", "sha256", "sha384", "sha512"] + + +def scalar_to_binary(x): + if isinstance(x, str): + return x.encode() + elif isinstance(x, float): + return struct.pack("> 2))) + + +def uint_hash_combine_32(lhs, rhs): + return hash_combine_32(np.uint32(lhs), np.uint32(rhs)) + + +def libcudf_mmh3_x86_32(binary): + seed = plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + hashval = mmh3.hash(binary, seed) + return hash_combine_32(seed, hashval) + + +@pytest.fixture(params=[pa.int64(), pa.float64(), pa.string(), pa.bool_()]) +def scalar_type(request): + return request.param + + +@pytest.fixture +def pa_scalar_input_column(scalar_type): + if pa.types.is_integer(scalar_type) or pa.types.is_floating(scalar_type): + return pa.array([1, 2, 3], type=scalar_type) + elif pa.types.is_string(scalar_type): + return pa.array(["a", "b", "c"], type=scalar_type) + elif pa.types.is_boolean(scalar_type): + return pa.array([True, True, False], type=scalar_type) + + +@pytest.fixture +def plc_scalar_input_tbl(pa_scalar_input_column): + return plc.interop.from_arrow( + pa.Table.from_arrays([pa_scalar_input_column], names=["data"]) + ) + + +@pytest.fixture(scope="module") +def list_struct_table(): + data = pa.Table.from_pydict( + { + "list": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + "struct": [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}], + } + ) + return data + + +def python_hash_value(x, method): + if method == "murmurhash3_x86_32": + return libcudf_mmh3_x86_32(x) + elif method == "murmurhash3_x64_128": + hasher = mmh3.mmh3_x64_128(seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED) + hasher.update(x) + # libcudf returns a tuple of two 64-bit integers + return hasher.utupledigest() + elif method == "xxhash_64": + return xxhash.xxh64( + x, seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ).intdigest() + else: + return getattr(hashlib, method)(x).hexdigest() + + +@pytest.mark.parametrize( + "method", ["sha1", "sha224", "sha256", "sha384", "sha512", "md5"] +) +def test_hash_column_sha_md5( + pa_scalar_input_column, plc_scalar_input_tbl, method +): + plc_hasher = getattr(plc.hashing, method) + + def py_hasher(val): + return getattr(hashlib, method)(scalar_to_binary(val)).hexdigest() + + expect = pa.array( + [py_hasher(val) for val in pa_scalar_input_column.to_pylist()], + type=pa.string(), + ) + got = plc_hasher(plc_scalar_input_tbl) + assert_column_eq(got, expect) + + +def test_hash_column_xxhash64(pa_scalar_input_column, plc_scalar_input_tbl): + def py_hasher(val): + return xxhash.xxh64( + scalar_to_binary(val), seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ).intdigest() + + expect = pa.array( + [py_hasher(val) for val in pa_scalar_input_column.to_pylist()], + type=pa.uint64(), + ) + got = plc.hashing.xxhash_64(plc_scalar_input_tbl, 0) + + assert_column_eq(got, expect) + + +@pytest.mark.parametrize( + "method", ["sha1", "sha224", "sha256", "sha384", "sha512"] +) +@pytest.mark.parametrize("dtype", ["list", "struct"]) +def test_sha_list_struct_err(list_struct_table, dtype, method): + err_types = list_struct_table.select([dtype]) + plc_tbl = plc.interop.from_arrow(err_types) + plc_hasher = getattr(plc.hashing, method) + + with pytest.raises(TypeError): + plc_hasher(plc_tbl) + + +def test_md5_struct_err(list_struct_table): + err_types = list_struct_table.select(["struct"]) + plc_tbl = plc.interop.from_arrow(err_types) + + with pytest.raises(TypeError): + plc.hashing.md5(plc_tbl) + + +def test_murmurhash3_x86_32(pa_scalar_input_column, plc_scalar_input_tbl): + def py_hasher(val): + return libcudf_mmh3_x86_32(scalar_to_binary(val)) + + got = plc.hashing.murmurhash3_x86_32(plc_scalar_input_tbl, 0) + expect = pa.array( + [py_hasher(val) for val in pa_scalar_input_column.to_pylist()], + type=pa.uint32(), + ) + got = plc.hashing.murmurhash3_x86_32(plc_scalar_input_tbl, 0) + assert_column_eq(got, expect) + + +@pytest.mark.filterwarnings("ignore::RuntimeWarning") +def test_murmurhash3_x86_32_list(): + pa_tbl = pa.Table.from_pydict( + { + "list": pa.array( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], type=pa.list_(pa.uint32()) + ) + } + ) + plc_tbl = plc.interop.from_arrow(pa_tbl) + + def hash_list(list_): + hash_value = uint_hash_combine_32(0, hash_single_uint32(len(list_))) + + for element in list_: + hash_value = uint_hash_combine_32( + hash_value, + hash_single_uint32( + element, seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ), + ) + + final = uint_hash_combine_32( + plc.hashing.LIBCUDF_DEFAULT_HASH_SEED, hash_value + ) + return final + + expect = pa.array( + [hash_list(val) for val in pa_tbl["list"].to_pylist()], + type=pa.uint32(), + ) + got = plc.hashing.murmurhash3_x86_32( + plc_tbl, plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ) + assert_column_eq(got, expect) + + +@pytest.mark.filterwarnings("ignore::RuntimeWarning") +def test_murmurhash3_x86_32_struct(): + pa_tbl = pa.table( + { + "struct": pa.array( + [ + {"a": 1, "b": 2, "c": 3}, + {"a": 4, "b": 5, "c": 6}, + {"a": 7, "b": 8, "c": 9}, + ], + type=pa.struct( + [ + pa.field("a", pa.uint32()), + pa.field("b", pa.uint32(), pa.field("c", pa.uint32())), + ] + ), + ) + } + ) + plc_tbl = plc.interop.from_arrow(pa_tbl) + + def hash_struct(s): + seed = plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + keys = list(s.keys()) + + combined_hash = hash_single_uint32(s[keys[0]], seed=seed) + combined_hash = uint_hash_combine_32(0, combined_hash) + combined_hash = uint_hash_combine_32(seed, combined_hash) + + for key in keys[1:]: + current_hash = hash_single_uint32(s[key], seed=seed) + combined_hash = uint_hash_combine_32(combined_hash, current_hash) + + return combined_hash + + got = plc.hashing.murmurhash3_x86_32( + plc_tbl, plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ) + + expect = pa.array( + [hash_struct(val) for val in pa_tbl["struct"].to_pylist()], + type=pa.uint32(), + ) + assert_column_eq(got, expect) + + +def test_murmurhash3_x64_128(pa_scalar_input_column, plc_scalar_input_tbl): + def py_hasher(val): + hasher = mmh3.mmh3_x64_128(seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED) + hasher.update(val) + return hasher.utupledigest() + + tuples = [ + py_hasher(scalar_to_binary(val)) + for val in pa_scalar_input_column.to_pylist() + ] + expect = pa.Table.from_arrays( + [ + pa.array([np.uint64(t[0]) for t in tuples]), + pa.array([np.uint64(t[1]) for t in tuples]), + ], + names=["0", "1"], + ) + got = plc.hashing.murmurhash3_x64_128(plc_scalar_input_tbl, 0) + + assert_table_eq(expect, got) From a0711d0f8492762877ea7c84e78166413f44f178 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:18:15 -0400 Subject: [PATCH 62/76] Migrate NVtext subword tokenizing APIs to pylibcudf (#17096) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17096 --- .../api_docs/pylibcudf/nvtext/index.rst | 1 + .../pylibcudf/nvtext/subword_tokenize.rst | 6 ++ .../cudf/_lib/nvtext/subword_tokenize.pyx | 50 +++-------- python/cudf/cudf/core/subword_tokenizer.py | 7 +- .../libcudf/nvtext/subword_tokenize.pxd | 8 +- .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 2 +- .../pylibcudf/pylibcudf/nvtext/__init__.pxd | 2 + python/pylibcudf/pylibcudf/nvtext/__init__.py | 2 + .../pylibcudf/nvtext/subword_tokenize.pxd | 20 +++++ .../pylibcudf/nvtext/subword_tokenize.pyx | 84 +++++++++++++++++++ .../tests/test_nvtext_subword_tokenize.py | 63 ++++++++++++++ 11 files changed, 202 insertions(+), 43 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/subword_tokenize.rst create mode 100644 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd create mode 100644 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_subword_tokenize.py diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst index 00314bceeb9..9ba47fd8d70 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst @@ -13,4 +13,5 @@ nvtext normalize replace stemmer + subword_tokenize tokenize diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/subword_tokenize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/subword_tokenize.rst new file mode 100644 index 00000000000..818714bec6a --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/subword_tokenize.rst @@ -0,0 +1,6 @@ +================ +subword_tokenize +================ + +.. automodule:: pylibcudf.nvtext.subword_tokenize + :members: diff --git a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx index ee442ece5c6..5e0bfb74705 100644 --- a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx +++ b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx @@ -5,35 +5,16 @@ from libc.stdint cimport uint32_t from cudf.core.buffer import acquire_spill_lock from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.subword_tokenize cimport ( - hashed_vocabulary as cpp_hashed_vocabulary, - load_vocabulary_file as cpp_load_vocabulary_file, - move as tr_move, - subword_tokenize as cpp_subword_tokenize, - tokenizer_result as cpp_tokenizer_result, -) from cudf._lib.column cimport Column - -cdef class Hashed_Vocabulary: - cdef unique_ptr[cpp_hashed_vocabulary] c_obj - - def __cinit__(self, hash_file): - cdef string c_hash_file = str(hash_file).encode() - with nogil: - self.c_obj = move(cpp_load_vocabulary_file(c_hash_file)) +from pylibcudf import nvtext @acquire_spill_lock() def subword_tokenize_inmem_hash( Column strings, - Hashed_Vocabulary hashed_vocabulary, + object hashed_vocabulary, uint32_t max_sequence_length=64, uint32_t stride=48, bool do_lower=True, @@ -42,21 +23,16 @@ def subword_tokenize_inmem_hash( """ Subword tokenizes text series by using the pre-loaded hashed vocabulary """ - cdef column_view c_strings = strings.view() - cdef cpp_tokenizer_result c_result - with nogil: - c_result = tr_move( - cpp_subword_tokenize( - c_strings, - hashed_vocabulary.c_obj.get()[0], - max_sequence_length, - stride, - do_lower, - do_truncate, - ) - ) + result = nvtext.subword_tokenize.subword_tokenize( + strings.to_pylibcudf(mode="read"), + hashed_vocabulary, + max_sequence_length, + stride, + do_lower, + do_truncate, + ) # return the 3 tensor components - tokens = Column.from_unique_ptr(move(c_result.tensor_token_ids)) - masks = Column.from_unique_ptr(move(c_result.tensor_attention_mask)) - metadata = Column.from_unique_ptr(move(c_result.tensor_metadata)) + tokens = Column.from_pylibcudf(result[0]) + masks = Column.from_pylibcudf(result[1]) + metadata = Column.from_pylibcudf(result[2]) return tokens, masks, metadata diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index 9e59b134b73..dda1f199078 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -6,8 +6,9 @@ import cupy as cp +import pylibcudf as plc + from cudf._lib.nvtext.subword_tokenize import ( - Hashed_Vocabulary as cpp_hashed_vocabulary, subword_tokenize_inmem_hash as cpp_subword_tokenize, ) @@ -50,7 +51,9 @@ class SubwordTokenizer: def __init__(self, hash_file: str, do_lower_case: bool = True): self.do_lower_case = do_lower_case - self.vocab_file = cpp_hashed_vocabulary(hash_file) + self.vocab_file = plc.nvtext.subword_tokenize.HashedVocabulary( + hash_file + ) def __call__( self, diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd index aabac0a617b..8dac86d688d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd @@ -9,14 +9,14 @@ from pylibcudf.libcudf.column.column_view cimport column_view cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: - cdef cppclass tokenizer_result "nvtext::tokenizer_result": + cdef cppclass tokenizer_result: uint32_t nrows_tensor uint32_t sequence_length unique_ptr[column] tensor_token_ids unique_ptr[column] tensor_attention_mask unique_ptr[column] tensor_metadata - cdef struct hashed_vocabulary "nvtext::hashed_vocabulary": + cdef cppclass hashed_vocabulary: uint16_t first_token_id uint16_t separator_token_id uint16_t unknown_token_id @@ -26,6 +26,8 @@ cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: unique_ptr[column] table unique_ptr[column] bin_coefficients unique_ptr[column] bin_offsets + unique_ptr[column] cp_metadata + unique_ptr[column] aux_cp_table cdef unique_ptr[hashed_vocabulary] load_vocabulary_file( const string &filename_hashed_vocabulary @@ -33,7 +35,7 @@ cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: cdef tokenizer_result subword_tokenize( const column_view & strings, - hashed_vocabulary & hashed_vocablary_obj, + hashed_vocabulary & hashed_vocabulary_obj, uint32_t max_sequence_length, uint32_t stride, bool do_lower, diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt index fa3cd448fa3..93e3fb15259 100644 --- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt @@ -14,7 +14,7 @@ set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx ngrams_tokenize.pyx normalize.pyx - replace.pyx stemmer.pyx tokenize.pyx byte_pair_encode.pyx + replace.pyx stemmer.pyx tokenize.pyx byte_pair_encode.pyx subword_tokenize.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd index a9ede17761b..ef837167eb9 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd @@ -10,6 +10,7 @@ from . cimport ( normalize, replace, stemmer, + subword_tokenize, tokenize, ) @@ -23,5 +24,6 @@ __all__ = [ "normalize", "replace", "stemmer", + "subword_tokenize", "tokenize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index 87650a02c33..4f125d3a733 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -10,6 +10,7 @@ normalize, replace, stemmer, + subword_tokenize, tokenize, ) @@ -23,5 +24,6 @@ "normalize", "replace", "stemmer", + "subword_tokenize", "tokenize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd new file mode 100644 index 00000000000..091c7b897ac --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd @@ -0,0 +1,20 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libc.stdint cimport uint32_t +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.subword_tokenize cimport hashed_vocabulary + + +cdef class HashedVocabulary: + cdef unique_ptr[hashed_vocabulary] c_obj + +cpdef tuple[Column, Column, Column] subword_tokenize( + Column input, + HashedVocabulary vocabulary_table, + uint32_t max_sequence_length, + uint32_t stride, + bool do_lower_case, + bool do_truncate, +) diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx new file mode 100644 index 00000000000..04643d3bd84 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx @@ -0,0 +1,84 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libc.stdint cimport uint32_t +from libcpp cimport bool +from libcpp.string cimport string +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.subword_tokenize cimport ( + load_vocabulary_file as cpp_load_vocabulary_file, + move as tr_move, + subword_tokenize as cpp_subword_tokenize, + tokenizer_result as cpp_tokenizer_result, +) + + +cdef class HashedVocabulary: + """The vocabulary data for use with the subword_tokenize function. + + For details, see :cpp:class:`cudf::nvtext::hashed_vocabulary`. + """ + def __cinit__(self, hash_file): + cdef string c_hash_file = str(hash_file).encode() + with nogil: + self.c_obj = move(cpp_load_vocabulary_file(c_hash_file)) + +cpdef tuple[Column, Column, Column] subword_tokenize( + Column input, + HashedVocabulary vocabulary_table, + uint32_t max_sequence_length, + uint32_t stride, + bool do_lower_case, + bool do_truncate, +): + """ + Creates a tokenizer that cleans the text, splits it into + tokens and returns token-ids from an input vocabulary. + + For details, see cpp:func:`subword_tokenize` + + Parameters + ---------- + input : Column + The input strings to tokenize. + vocabulary_table : HashedVocabulary + The vocabulary table pre-loaded into this object. + max_sequence_length : uint32_t + Limit of the number of token-ids per row in final tensor for each string. + stride : uint32_t + Each row in the output token-ids will replicate + ``max_sequence_length`` - ``stride`` the token-ids + from the previous row, unless it is the first string. + do_lower_case : bool + If true, the tokenizer will convert uppercase characters in the + input stream to lower-case and strip accents from those characters. + If false, accented and uppercase characters are not transformed. + do_truncate : bool + If true, the tokenizer will discard all the token-ids after + ``max_sequence_length`` for each input string. If false, it + will use a new row in the output token-ids to continue + generating the output. + + Returns + ------- + tuple[Column, Column, Column] + A tuple of three columns containing the + tokens, masks, and metadata. + """ + cdef cpp_tokenizer_result c_result + with nogil: + c_result = tr_move( + cpp_subword_tokenize( + input.view(), + dereference(vocabulary_table.c_obj.get()), + max_sequence_length, + stride, + do_lower_case, + do_truncate, + ) + ) + cdef Column tokens = Column.from_libcudf(move(c_result.tensor_token_ids)) + cdef Column masks = Column.from_libcudf(move(c_result.tensor_attention_mask)) + cdef Column metadata = Column.from_libcudf(move(c_result.tensor_metadata)) + return tokens, masks, metadata diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_subword_tokenize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_subword_tokenize.py new file mode 100644 index 00000000000..516d0f7f78d --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_subword_tokenize.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture +def vocab_file(tmpdir): + hash_file = tmpdir.mkdir("nvtext").join("tmp_hashed_vocab.txt") + content = "1\n0\n10\n" + coefficients = [65559] * 10 + for c in coefficients: + content = content + str(c) + " 0\n" + table = [0] * 10 + table[0] = 3015668 + content = content + "10\n" + for v in table: + content = content + str(v) + "\n" + content = content + "100\n101\n102\n\n" + hash_file.write(content) + return str(hash_file) + + +@pytest.fixture +def column_input(): + return pa.array(["This is a test"]) + + +@pytest.mark.parametrize("max_sequence_length", [64, 128]) +@pytest.mark.parametrize("stride", [32, 64]) +@pytest.mark.parametrize("do_lower_case", [True, False]) +@pytest.mark.parametrize("do_truncate", [True, False]) +def test_subword_tokenize( + vocab_file, + column_input, + max_sequence_length, + stride, + do_lower_case, + do_truncate, +): + vocab = plc.nvtext.subword_tokenize.HashedVocabulary(vocab_file) + tokens, masks, metadata = plc.nvtext.subword_tokenize.subword_tokenize( + plc.interop.from_arrow(column_input), + vocab, + max_sequence_length, + stride, + do_lower_case, + do_truncate, + ) + expected_tokens = pa.array( + [100] * 4 + [0] * (max_sequence_length - 4), type=pa.uint32() + ) + expected_masks = pa.array( + [1] * 4 + [0] * (max_sequence_length - 4), type=pa.uint32() + ) + expected_metadata = pa.array([0, 0, 3], type=pa.uint32()) + + assert_column_eq(tokens, expected_tokens) + assert_column_eq(masks, expected_masks) + assert_column_eq(metadata, expected_metadata) From 01cfcffcf077db6d27e770d61d204257c6ca6481 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 31 Oct 2024 03:38:46 -0400 Subject: [PATCH 63/76] Remove unsanitized nulls from input strings columns in reduction gtests (#17202) Input strings column containing unsanitized nulls may result in undefined behavior. This PR fixes the input data to not include string characters in null rows in gtests for `REDUCTION_TESTS`. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/17202 --- cpp/tests/reductions/list_rank_test.cpp | 2 +- cpp/tests/reductions/rank_tests.cpp | 2 +- cpp/tests/reductions/reduction_tests.cpp | 29 ++++++++---- cpp/tests/reductions/scan_tests.cpp | 60 ++++++++++++++---------- 4 files changed, 57 insertions(+), 36 deletions(-) diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp index 736b5081d8f..cb412f1e925 100644 --- a/cpp/tests/reductions/list_rank_test.cpp +++ b/cpp/tests/reductions/list_rank_test.cpp @@ -131,7 +131,7 @@ TEST_F(ListRankScanTest, ListOfStruct) false, false}}; auto col2 = cudf::test::strings_column_wrapper{ - {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"}, + {"x", "x", "a", "a", "b", "", "a", "b", "a", "b", "a", "c", "a", "c", "", "", "b", "b"}, {true, true, true, diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp index 19633211192..130458548fc 100644 --- a/cpp/tests/reductions/rank_tests.cpp +++ b/cpp/tests/reductions/rank_tests.cpp @@ -125,7 +125,7 @@ auto make_input_column() { if constexpr (std::is_same_v) { return cudf::test::strings_column_wrapper{ - {"0", "0", "4", "4", "4", "5", "7", "7", "7", "9", "9", "9"}, + {"0", "0", "4", "4", "4", "", "7", "7", "7", "9", "9", "9"}, cudf::test::iterators::null_at(5)}; } else { using fw_wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index c09cde8f9e4..67083f19b3a 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1255,6 +1255,12 @@ TEST_P(StringReductionTest, MinMax) // data and valid arrays std::vector host_strings(GetParam()); std::vector host_bools({true, false, true, true, true, true, false, false, true}); + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(host_strings.size()), + host_strings.begin(), + [host_strings, host_bools](auto idx) { + return host_bools[idx] ? host_strings[idx] : std::string{}; + }); bool succeed(true); std::string initial_value = "init"; @@ -1381,7 +1387,7 @@ TEST_F(StringReductionTest, AllNull) std::vector host_strings( {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}); std::vector host_bools(host_strings.size(), false); - auto initial_value = cudf::make_string_scalar("init"); + auto initial_value = cudf::make_string_scalar(""); initial_value->set_valid_async(false); // string column with nulls @@ -3082,21 +3088,28 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls) using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; - // `null` means null at child column. - // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*null*/, - "aaa" /*NULL*/, + "", // child null + "aaa", // parent null "zit", "bat", "aab", - "$1" /*null*/, - "€1" /*NULL*/, + "", // child null + "€1", // parent null "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, + 2, + 0, // child null + 4, // parent null + 5, + 6, + 7, + 0, // child null + 9, // parent NULL + 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 72d92c5ac53..5f911597b02 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -412,12 +412,13 @@ TEST_F(ScanStringsTest, MoreStringsMinMax) { int row_count = 512; - auto data_begin = cudf::detail::make_counting_transform_iterator(0, [](auto idx) { + auto validity = cudf::detail::make_counting_transform_iterator( + 0, [](auto idx) -> bool { return (idx % 23) != 22; }); + auto data_begin = cudf::detail::make_counting_transform_iterator(0, [validity](auto idx) { + if (validity[idx] == 0) return std::string{}; char const s = static_cast('a' + (idx % 26)); return std::string{1, s}; }); - auto validity = cudf::detail::make_counting_transform_iterator( - 0, [](auto idx) -> bool { return (idx % 23) != 22; }); cudf::test::strings_column_wrapper col(data_begin, data_begin + row_count, validity); thrust::host_vector v(data_begin, data_begin + row_count); @@ -620,21 +621,28 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; - // `null` means null at child column. - // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*null*/, - "aaa" /*NULL*/, + "", // child null + "aaa", // parent null "zit", "bat", "aab", - "$1" /*null*/, - "€1" /*NULL*/, + "", // child null + "€1", // parent null "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, + 2, + 0, // child null + 4, // parent null + 5, + 6, + 7, + 0, // child null + 9, // parent null + 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); @@ -692,25 +700,25 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) auto const expected = [] { auto child1 = STRINGS_CW{{"año", "año", - "" /*null*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/}, + "", // child null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + ""}, // parent null null_at(2)}; auto child2 = INTS_CW{{1, 1, - 0 /*null*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/}, + 0, // child null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0}, // parent null null_at(2)}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})}; }(); From cafcf6a6bc538c6eed6544ebd0e44169bfc483de Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:16:26 -0400 Subject: [PATCH 64/76] Add jaccard_index to generated cuDF docs (#17199) Adds the `jaccard_index` API to the generated docs. Also noticed `minhash` is not present and so added here as well. Also removed duplicate `rsplit` entry from the `.rst` file Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17199 --- docs/cudf/source/user_guide/api_docs/string_handling.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/user_guide/api_docs/string_handling.rst b/docs/cudf/source/user_guide/api_docs/string_handling.rst index ab0f085e1a6..91d3e33960b 100644 --- a/docs/cudf/source/user_guide/api_docs/string_handling.rst +++ b/docs/cudf/source/user_guide/api_docs/string_handling.rst @@ -60,6 +60,7 @@ strings and apply several methods to it. These can be accessed like isupper istimestamp istitle + jaccard_index join len like @@ -67,6 +68,7 @@ strings and apply several methods to it. These can be accessed like lower lstrip match + minhash ngrams ngrams_tokenize normalize_characters @@ -90,7 +92,6 @@ strings and apply several methods to it. These can be accessed like slice_from slice_replace split - rsplit startswith strip swapcase From e512258973ae50174066f7ef8bbe84a1f95437f0 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:27:47 -0400 Subject: [PATCH 65/76] Move strings::concatenate benchmark to nvbench (#17211) Moves the `cudf::strings::concatenate` benchmark source from google-bench to nvbench. This also removes the restrictions on the parameters to allows specifying arbitrary number of rows and string width. Reference #16948 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17211 --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/string/combine.cpp | 58 +++++++++++-------------------- 2 files changed, 22 insertions(+), 38 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 2a4ac789046..68781889c53 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -356,7 +356,6 @@ ConfigureNVBench( # * strings benchmark ------------------------------------------------------------------- ConfigureBench( STRINGS_BENCH - string/combine.cpp string/convert_datetime.cpp string/convert_durations.cpp string/convert_fixed_point.cpp @@ -374,6 +373,7 @@ ConfigureNVBench( STRINGS_NVBENCH string/case.cpp string/char_types.cpp + string/combine.cpp string/contains.cpp string/copy_if_else.cpp string/copy_range.cpp diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp index 7acfb1ffb0d..d6ccfae63e8 100644 --- a/cpp/benchmarks/string/combine.cpp +++ b/cpp/benchmarks/string/combine.cpp @@ -14,57 +14,41 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include -#include -#include #include #include #include #include -class StringCombine : public cudf::benchmark {}; +#include -static void BM_combine(benchmark::State& state) +static void bench_combine(nvbench::state& state) { - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; - data_profile const table_profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); auto const table = create_random_table( - {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{n_rows}, table_profile); + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, profile); cudf::strings_column_view input1(table->view().column(0)); cudf::strings_column_view input2(table->view().column(1)); cudf::string_scalar separator("+"); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::strings::concatenate(table->view(), separator); - } - - state.SetBytesProcessed(state.iterations() * (input1.chars_size(cudf::get_default_stream()) + - input2.chars_size(cudf::get_default_stream()))); -} + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = + input1.chars_size(stream) + input2.chars_size(stream) + (num_rows * separator.size()); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 4; - int const max_rowlen = 1 << 11; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::strings::concatenate(table->view(), separator); + }); } -#define STRINGS_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringCombine, name) \ - (::benchmark::State & st) { BM_combine(st); } \ - BENCHMARK_REGISTER_F(StringCombine, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -STRINGS_BENCHMARK_DEFINE(concat) +NVBENCH_BENCH(bench_combine) + .set_name("concat") + .add_int64_axis("row_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}); From 9657c9a5dc4c4a1bf9fd7b55cfeb53c60dda3c66 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Thu, 31 Oct 2024 07:19:48 -0700 Subject: [PATCH 66/76] Fix `Schema.Builder` does not propagate precision value to `Builder` instance (#17214) When calling `Schema.Builder.build()`, the value `topLevelPrecision` should be passed into the constructor of the `Schema` class. However, it was forgotten. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/17214 --- java/src/main/java/ai/rapids/cudf/Schema.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Schema.java b/java/src/main/java/ai/rapids/cudf/Schema.java index 6da591d659f..ae8a0e17f9d 100644 --- a/java/src/main/java/ai/rapids/cudf/Schema.java +++ b/java/src/main/java/ai/rapids/cudf/Schema.java @@ -36,13 +36,13 @@ public class Schema { private static final int UNKNOWN_PRECISION = -1; /** - * Store precision for the top level column, only applicable if the column is a decimal type. - *

- * This variable is not designed to be used by any libcudf's APIs since libcudf does not support - * precisions for fixed point numbers. - * Instead, it is used only to pass down the precision values from Spark's DecimalType to the - * JNI level, where some JNI functions require these values to perform their operations. - */ + * Store precision for the top level column, only applicable if the column is a decimal type. + *

+ * This variable is not designed to be used by any libcudf's APIs since libcudf does not support + * precisions for fixed point numbers. + * Instead, it is used only to pass down the precision values from Spark's DecimalType to the + * JNI level, where some JNI functions require these values to perform their operations. + */ private final int topLevelPrecision; private final List childNames; @@ -429,7 +429,7 @@ public Schema build() { children.add(b.build()); } } - return new Schema(topLevelType, names, children); + return new Schema(topLevelType, topLevelPrecision, names, children); } } } From 3db6a0e4b3c29fa4818e04075301d3a4863d1bc8 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:52:49 -0400 Subject: [PATCH 67/76] Add TokenizeVocabulary to api docs (#17208) Adds the `TokenizeVocabulary` class to the cuDF API guide. Also removes the `SubwordTokenizer` which is to be deprecated in the future. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17208 --- docs/cudf/source/user_guide/api_docs/index.rst | 2 +- .../source/user_guide/api_docs/subword_tokenize.rst | 12 ------------ .../user_guide/api_docs/tokenize_vocabulary.rst | 12 ++++++++++++ 3 files changed, 13 insertions(+), 13 deletions(-) delete mode 100644 docs/cudf/source/user_guide/api_docs/subword_tokenize.rst create mode 100644 docs/cudf/source/user_guide/api_docs/tokenize_vocabulary.rst diff --git a/docs/cudf/source/user_guide/api_docs/index.rst b/docs/cudf/source/user_guide/api_docs/index.rst index d05501f4a4a..f711327f9ed 100644 --- a/docs/cudf/source/user_guide/api_docs/index.rst +++ b/docs/cudf/source/user_guide/api_docs/index.rst @@ -19,7 +19,7 @@ This page provides a list of all publicly accessible modules, methods and classe general_utilities window io - subword_tokenize + tokenize_vocabulary string_handling list_handling struct_handling diff --git a/docs/cudf/source/user_guide/api_docs/subword_tokenize.rst b/docs/cudf/source/user_guide/api_docs/subword_tokenize.rst deleted file mode 100644 index cd240fe4db4..00000000000 --- a/docs/cudf/source/user_guide/api_docs/subword_tokenize.rst +++ /dev/null @@ -1,12 +0,0 @@ -================ -SubwordTokenizer -================ -.. currentmodule:: cudf.core.subword_tokenizer - -Constructor -~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - SubwordTokenizer - SubwordTokenizer.__call__ diff --git a/docs/cudf/source/user_guide/api_docs/tokenize_vocabulary.rst b/docs/cudf/source/user_guide/api_docs/tokenize_vocabulary.rst new file mode 100644 index 00000000000..1b5c965f3c9 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/tokenize_vocabulary.rst @@ -0,0 +1,12 @@ +================== +TokenizeVocabulary +================== +.. currentmodule:: cudf.core.tokenize_vocabulary + +Constructor +~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TokenizeVocabulary + TokenizeVocabulary.tokenize From f99ef41a8f01395635dadd4decba182e6318fc72 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:55:26 -0400 Subject: [PATCH 68/76] Move detail header floating_conversion.hpp to detail subdirectory (#17209) Moves the 'cudf/fixed_point/floating_conversion.hpp' to `cudf/fixed_point/detail/` subdirectory since it only contains declarations and definition in the `detail` namespace. It had previously been its own module. https://docs.rapids.ai/api/libcudf/stable/modules.html Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Shruti Shivakumar (https://github.com/shrshi) - Vyas Ramasubramani (https://github.com/vyasr) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17209 --- .../fixed_point/{ => detail}/floating_conversion.hpp | 10 ---------- cpp/include/cudf/unary.hpp | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) rename cpp/include/cudf/fixed_point/{ => detail}/floating_conversion.hpp (99%) diff --git a/cpp/include/cudf/fixed_point/floating_conversion.hpp b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp similarity index 99% rename from cpp/include/cudf/fixed_point/floating_conversion.hpp rename to cpp/include/cudf/fixed_point/detail/floating_conversion.hpp index f0d50edccd1..fce08b4a5c4 100644 --- a/cpp/include/cudf/fixed_point/floating_conversion.hpp +++ b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp @@ -26,14 +26,6 @@ #include namespace CUDF_EXPORT numeric { - -/** - * @addtogroup floating_conversion - * @{ - * @file - * @brief fixed_point <--> floating-point conversion functions. - */ - namespace detail { /** @@ -1141,6 +1133,4 @@ CUDF_HOST_DEVICE inline FloatingType convert_integral_to_floating(Rep const& val } } // namespace detail - -/** @} */ // end of group } // namespace CUDF_EXPORT numeric diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 53e0f3a15d2..046e9745a71 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -16,8 +16,8 @@ #pragma once +#include #include -#include #include #include #include From f7020f12a5b84362b5172eb3be55c75acb04949e Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Thu, 31 Oct 2024 14:08:37 -0400 Subject: [PATCH 69/76] Expose stream-ordering in partitioning API (#17213) Add stream parameter to public APIs: ``` cudf::partition cudf::round_robin_partition ``` Added stream gtest for above two functions and for `hash_partition`. Reference: https://github.com/rapidsai/cudf/issues/13744 Authors: - Shruti Shivakumar (https://github.com/shrshi) Approvers: - Nghia Truong (https://github.com/ttnghia) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17213 --- cpp/include/cudf/partitioning.hpp | 4 ++ cpp/src/partitioning/partitioning.cu | 3 +- cpp/src/partitioning/round_robin.cu | 4 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/partitioning_test.cpp | 73 +++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 cpp/tests/streams/partitioning_test.cpp diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 385da993262..f9a68e4fffc 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -70,6 +70,7 @@ enum class hash_id { * @param partition_map Non-nullable column of integer values that map each row * in `t` to it's partition. * @param num_partitions The total number of partitions + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return Pair containing the reordered table and vector of `num_partitions + * 1` offsets to each partition such that the size of partition `i` is @@ -79,6 +80,7 @@ std::pair, std::vector> partition( table_view const& t, column_view const& partition_map, size_type num_partitions, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @@ -242,6 +244,7 @@ std::pair, std::vector> hash_partition( * @param[in] input The input table to be round-robin partitioned * @param[in] num_partitions Number of partitions for the table * @param[in] start_partition Index of the 1st partition + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * * @return A std::pair consisting of a unique_ptr to the partitioned table @@ -251,6 +254,7 @@ std::pair, std::vector> round_robi table_view const& input, cudf::size_type num_partitions, cudf::size_type start_partition = 0, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 17008e80e79..ebab3beb08f 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -834,10 +834,11 @@ std::pair, std::vector> partition( table_view const& t, column_view const& partition_map, size_type num_partitions, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::partition(t, partition_map, num_partitions, cudf::get_default_stream(), mr); + return detail::partition(t, partition_map, num_partitions, stream, mr); } } // namespace cudf diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index 5a4c90a67a5..ab6ab393878 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -273,11 +273,11 @@ std::pair, std::vector> round_robi table_view const& input, cudf::size_type num_partitions, cudf::size_type start_partition, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::round_robin_partition( - input, num_partitions, start_partition, cudf::get_default_stream(), mr); + return detail::round_robin_partition(input, num_partitions, start_partition, stream, mr); } } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b78a64d0e55..a5e1cf646b4 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -710,6 +710,7 @@ ConfigureTest(STREAM_MULTIBYTE_SPLIT_TEST streams/io/multibyte_split_test.cpp ST ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ORCIO_TEST streams/io/orc_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_PARQUETIO_TEST streams/io/parquet_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_PARTITIONING_TEST streams/partitioning_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_POOL_TEST streams/pool_test.cu STREAM_MODE testing) ConfigureTest(STREAM_REDUCTION_TEST streams/reduction_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/partitioning_test.cpp b/cpp/tests/streams/partitioning_test.cpp new file mode 100644 index 00000000000..636c5c1f1f9 --- /dev/null +++ b/cpp/tests/streams/partitioning_test.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using cudf::test::fixed_width_column_wrapper; +using cudf::test::strings_column_wrapper; + +class PartitionTest : public cudf::test::BaseFixture {}; + +TEST_F(PartitionTest, Struct) +{ + fixed_width_column_wrapper A({1, 2}, {0, 1}); + auto struct_col = cudf::test::structs_column_wrapper({A}, {0, 1}).release(); + auto table_to_partition = cudf::table_view{{*struct_col}}; + fixed_width_column_wrapper map{9, 2}; + + auto num_partitions = 12; + auto result = + cudf::partition(table_to_partition, map, num_partitions, cudf::test::get_default_stream()); +} + +TEST_F(PartitionTest, EmptyInput) +{ + auto const empty_column = fixed_width_column_wrapper{}; + auto const num_partitions = 5; + auto const start_partition = 0; + auto const [out_table, out_offsets] = + cudf::round_robin_partition(cudf::table_view{{empty_column}}, + num_partitions, + start_partition, + cudf::test::get_default_stream()); +} + +TEST_F(PartitionTest, ZeroPartitions) +{ + fixed_width_column_wrapper floats({1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}); + fixed_width_column_wrapper integers({1, 2, 3, 4, 5, 6, 7, 8}); + strings_column_wrapper strings({"a", "bb", "ccc", "d", "ee", "fff", "gg", "h"}); + auto input = cudf::table_view({floats, integers, strings}); + + auto columns_to_hash = std::vector({2}); + + cudf::size_type const num_partitions = 0; + auto [output, offsets] = cudf::hash_partition(input, + columns_to_hash, + num_partitions, + cudf::hash_id::HASH_MURMUR3, + cudf::DEFAULT_HASH_SEED, + cudf::test::get_default_stream()); +} From 02a50e8aa708877f06d5a19a3aa070b08aff5b1f Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:09:22 -0400 Subject: [PATCH 70/76] Remove `nvtext::load_vocabulary` from pylibcudf (#17220) This PR follow up #17100 to address the last review here https://github.com/rapidsai/cudf/pull/17100#pullrequestreview-2406700961 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17220 --- python/cudf/cudf/core/tokenize_vocabulary.py | 5 +-- .../pylibcudf/pylibcudf/nvtext/tokenize.pxd | 2 -- .../pylibcudf/pylibcudf/nvtext/tokenize.pyx | 36 ++++--------------- .../pylibcudf/tests/test_nvtext_tokenize.py | 11 ++---- 4 files changed, 12 insertions(+), 42 deletions(-) diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py index f0ce6e9d5d1..1e31376cce8 100644 --- a/python/cudf/cudf/core/tokenize_vocabulary.py +++ b/python/cudf/cudf/core/tokenize_vocabulary.py @@ -2,9 +2,10 @@ from __future__ import annotations +import pylibcudf as plc + import cudf from cudf._lib.nvtext.tokenize import ( - TokenizeVocabulary as cpp_tokenize_vocabulary, tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, ) @@ -20,7 +21,7 @@ class TokenizeVocabulary: """ def __init__(self, vocabulary: "cudf.Series"): - self.vocabulary = cpp_tokenize_vocabulary( + self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary( vocabulary._column.to_pylibcudf(mode="read") ) diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd index 0254b91ad58..0aed9702d61 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd @@ -21,8 +21,6 @@ cpdef Column character_tokenize(Column input) cpdef Column detokenize(Column input, Column row_indices, Scalar separator=*) -cpdef TokenizeVocabulary load_vocabulary(Column input) - cpdef Column tokenize_with_vocabulary( Column input, TokenizeVocabulary vocabulary, diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx index cdecfaabca2..ec02e8ebf4e 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx @@ -43,8 +43,7 @@ cpdef Column tokenize_scalar(Column input, Scalar delimiter=None): input : Column Strings column to tokenize delimiter : Scalar - String scalar used to separate individual - strings into tokens + String scalar used to separate individual strings into tokens Returns ------- @@ -106,7 +105,7 @@ cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None): ---------- input : Column Strings column to count tokens - delimiters : Scalar] + delimiters : Scalar String scalar used to separate each string into tokens Returns @@ -141,8 +140,7 @@ cpdef Column count_tokens_column(Column input, Column delimiters): input : Column Strings column to count tokens delimiters : Column - Strings column used to separate - each string into tokens + Strings column used to separate each string into tokens Returns ------- @@ -198,11 +196,9 @@ cpdef Column detokenize( input : Column Strings column to detokenize row_indices : Column - The relative output row index assigned - for each token in the input column + The relative output row index assigned for each token in the input column separator : Scalar - String to append after concatenating - each token to the proper output row + String to append after concatenating each token to the proper output row Returns ------- @@ -225,25 +221,6 @@ cpdef Column detokenize( return Column.from_libcudf(move(c_result)) -cpdef TokenizeVocabulary load_vocabulary(Column input): - """ - Create a ``TokenizeVocabulary`` object from a strings column. - - For details, see cpp:func:`cudf::nvtext::load_vocabulary` - - Parameters - ---------- - input : Column - Strings for the vocabulary - - Returns - ------- - TokenizeVocabulary - Object to be used with cpp:func:`cudf::nvtext::tokenize_with_vocabulary` - """ - return TokenizeVocabulary(input) - - cpdef Column tokenize_with_vocabulary( Column input, TokenizeVocabulary vocabulary, @@ -265,8 +242,7 @@ cpdef Column tokenize_with_vocabulary( delimiter : Scalar Used to identify tokens within ``input`` default_id : size_type - The token id to be used for tokens not found - in the vocabulary; Default is -1 + The token id to be used for tokens not found in the vocabulary; Default is -1 Returns ------- diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py index 4ec9a5ee1a5..f1b4a5637e1 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py @@ -78,18 +78,13 @@ def test_detokenize(input_col, delimiter): assert_column_eq(result, expected) -def test_load_vocabulary(input_col): - result = plc.nvtext.tokenize.load_vocabulary( - plc.interop.from_arrow(input_col) - ) - assert isinstance(result, plc.nvtext.tokenize.TokenizeVocabulary) - - @pytest.mark.parametrize("default_id", [-1, 0]) def test_tokenize_with_vocabulary(input_col, default_id): result = plc.nvtext.tokenize.tokenize_with_vocabulary( plc.interop.from_arrow(input_col), - plc.nvtext.tokenize.load_vocabulary(plc.interop.from_arrow(input_col)), + plc.nvtext.tokenize.TokenizeVocabulary( + plc.interop.from_arrow(input_col) + ), plc.interop.from_arrow(pa.scalar(" ")), default_id, ) From a83debbb22e2d82194af3430d4700b20edfaa079 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 31 Oct 2024 12:38:45 -0700 Subject: [PATCH 71/76] Fix groupby.get_group with length-1 tuple with list-like grouper (#17216) closes #17187 Adds similar logic as implemented in pandas: https://github.com/pandas-dev/pandas/blob/main/pandas/core/groupby/groupby.py#L751-L758 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17216 --- python/cudf/cudf/core/groupby/groupby.py | 5 +++++ python/cudf/cudf/tests/test_groupby.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 6630bd96c01..e59b948aba9 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -481,6 +481,11 @@ def get_group(self, name, obj=None): "instead of ``gb.get_group(name, obj=df)``.", FutureWarning, ) + if is_list_like(self._by): + if isinstance(name, tuple) and len(name) == 1: + name = name[0] + else: + raise KeyError(name) return obj.iloc[self.indices[name]] @_performance_tracking diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 6b222841622..e4422e204bc 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -4059,3 +4059,19 @@ def test_ndim(): pgb = pser.groupby([0, 0, 1]) ggb = gser.groupby(cudf.Series([0, 0, 1])) assert pgb.ndim == ggb.ndim + + +@pytest.mark.skipif( + not PANDAS_GE_220, reason="pandas behavior applicable in >=2.2" +) +def test_get_group_list_like(): + df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df.groupby(["a"]).get_group((1,)) + expected = df.to_pandas().groupby(["a"]).get_group((1,)) + assert_eq(result, expected) + + with pytest.raises(KeyError): + df.groupby(["a"]).get_group((1, 2)) + + with pytest.raises(KeyError): + df.groupby(["a"]).get_group([1]) From 6055393b215c62809a7248094a5848253121651e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 31 Oct 2024 13:42:23 -0700 Subject: [PATCH 72/76] Fix binop with LHS numpy datetimelike scalar (#17226) closes #17087 For binops, cudf tries to convert a 0D numpy array to a numpy scalar via `.dtype.type(value)`, but `.dtype.type` requires other parameters if its a `numpy.datetime64` or `numpy.timedelta64`. Indexing via `[()]` will perform this conversion correctly. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17226 --- python/cudf/cudf/core/column/column.py | 4 ++-- python/cudf/cudf/tests/test_binops.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index d2cd6e8ac8f..d2f9d208c77 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -580,8 +580,8 @@ def _wrap_binop_normalization(self, other): if cudf.utils.utils.is_na_like(other): return cudf.Scalar(other, dtype=self.dtype) if isinstance(other, np.ndarray) and other.ndim == 0: - # Try and maintain the dtype - other = other.dtype.type(other.item()) + # Return numpy scalar + other = other[()] return self.normalize_binop_value(other) def _scatter_by_slice( diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 949fa909b5b..71b6bbd688d 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -3431,3 +3431,16 @@ def test_binop_eq_ne_index_series(data1, data2): expected = gi.to_pandas() != gs.to_pandas() assert_eq(expected, actual) + + +@pytest.mark.parametrize("scalar", [np.datetime64, np.timedelta64]) +def test_binop_lhs_numpy_datetimelike_scalar(scalar): + slr1 = scalar(1, "ms") + slr2 = scalar(1, "ns") + result = slr1 < cudf.Series([slr2]) + expected = slr1 < pd.Series([slr2]) + assert_eq(result, expected) + + result = slr2 < cudf.Series([slr1]) + expected = slr2 < pd.Series([slr1]) + assert_eq(result, expected) From 0929115e6455ed06c0e9db498cbe33ae85d5c37c Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 31 Oct 2024 21:52:59 +0000 Subject: [PATCH 73/76] Support for polars 1.12 in cudf-polars (#17227) No new updates are required, we must just no longer xfail a test if running with 1.12 Authors: - Lawrence Mitchell (https://github.com/wence-) - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17227 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- python/cudf_polars/cudf_polars/containers/dataframe.py | 3 ++- .../cudf_polars/cudf_polars/dsl/expressions/aggregation.py | 1 + python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py | 4 ++-- python/cudf_polars/cudf_polars/dsl/expressions/boolean.py | 3 ++- python/cudf_polars/cudf_polars/dsl/expressions/datetime.py | 3 ++- python/cudf_polars/cudf_polars/dsl/expressions/literal.py | 1 + .../cudf_polars/cudf_polars/dsl/expressions/selection.py | 1 + python/cudf_polars/cudf_polars/dsl/expressions/string.py | 3 ++- python/cudf_polars/cudf_polars/dsl/expressions/unary.py | 1 + python/cudf_polars/cudf_polars/dsl/ir.py | 3 ++- python/cudf_polars/cudf_polars/dsl/to_ast.py | 4 ++-- python/cudf_polars/cudf_polars/dsl/translate.py | 3 ++- python/cudf_polars/cudf_polars/typing/__init__.py | 4 ++-- python/cudf_polars/cudf_polars/utils/dtypes.py | 3 ++- python/cudf_polars/cudf_polars/utils/versions.py | 7 ++++--- python/cudf_polars/pyproject.toml | 4 ++-- python/cudf_polars/tests/containers/test_column.py | 3 ++- python/cudf_polars/tests/containers/test_dataframe.py | 3 ++- python/cudf_polars/tests/dsl/test_expr.py | 3 ++- python/cudf_polars/tests/dsl/test_to_ast.py | 3 ++- python/cudf_polars/tests/dsl/test_traversal.py | 4 ++-- python/cudf_polars/tests/expressions/test_literal.py | 3 ++- python/cudf_polars/tests/expressions/test_sort.py | 3 ++- python/cudf_polars/tests/test_join.py | 3 ++- python/cudf_polars/tests/utils/test_broadcast.py | 3 ++- 28 files changed, 51 insertions(+), 31 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 24dc3c9a7cc..9d9fec97731 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -66,7 +66,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.12 +- polars>=1.11,<1.13 - pre-commit - ptxcompiler - pyarrow>=14.0.0,<18.0.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index a2bb2a3fe7f..19e3eafd641 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -64,7 +64,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.12 +- polars>=1.11,<1.13 - pre-commit - pyarrow>=14.0.0,<18.0.0a0 - pydata-sphinx-theme!=0.14.2 diff --git a/dependencies.yaml b/dependencies.yaml index 12038c5e503..90255ca674c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -727,7 +727,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.11,<1.12 + - polars>=1.11,<1.13 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py index 2c195f6637c..08bc9d0ea3f 100644 --- a/python/cudf_polars/cudf_polars/containers/dataframe.py +++ b/python/cudf_polars/cudf_polars/containers/dataframe.py @@ -9,10 +9,11 @@ from typing import TYPE_CHECKING, cast import pyarrow as pa -import pylibcudf as plc import polars as pl +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.utils import dtypes diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py index 41b1defab39..2af9fdaacc5 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py index 11a47e7ea51..245bdbefe88 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py @@ -8,10 +8,10 @@ from typing import TYPE_CHECKING, ClassVar -import pylibcudf as plc - from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py index 9c14a8386f3..8db8172ebd1 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py @@ -10,10 +10,11 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa -import pylibcudf as plc from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import ( ExecutionContext, diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py index 596e193d8fe..65fa4bfa62f 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py @@ -9,10 +9,11 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa -import pylibcudf as plc from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import ExecutionContext, Expr diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py index c8aa993b994..c16313bf83c 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/selection.py b/python/cudf_polars/cudf_polars/dsl/expressions/selection.py index 0247256e507..77d7d4c0d22 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/selection.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/selection.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py index 62b54c63a8d..8b66c9d4676 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py @@ -10,11 +10,12 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc from polars.exceptions import InvalidOperationError from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import ExecutionContext, Expr from cudf_polars.dsl.expressions.literal import Literal, LiteralColumn diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py index 53f6ed29239..6f22544c050 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 1aa6741d417..04aa74024cd 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -20,11 +20,12 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa -import pylibcudf as plc from typing_extensions import assert_never import polars as pl +import pylibcudf as plc + import cudf_polars.dsl.expr as expr from cudf_polars.containers import Column, DataFrame from cudf_polars.dsl.nodebase import Node diff --git a/python/cudf_polars/cudf_polars/dsl/to_ast.py b/python/cudf_polars/cudf_polars/dsl/to_ast.py index ffdae81de55..9a0838631cc 100644 --- a/python/cudf_polars/cudf_polars/dsl/to_ast.py +++ b/python/cudf_polars/cudf_polars/dsl/to_ast.py @@ -8,11 +8,11 @@ from functools import partial, reduce, singledispatch from typing import TYPE_CHECKING, TypeAlias +from polars.polars import _expr_nodes as pl_expr + import pylibcudf as plc from pylibcudf import expressions as plc_expr -from polars.polars import _expr_nodes as pl_expr - from cudf_polars.dsl import expr from cudf_polars.dsl.traversal import CachingVisitor from cudf_polars.typing import GenericTransformer diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index c28f2c2651a..5181214819e 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -12,13 +12,14 @@ from typing import TYPE_CHECKING, Any import pyarrow as pa -import pylibcudf as plc from typing_extensions import assert_never import polars as pl import polars.polars as plrs from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir +import pylibcudf as plc + from cudf_polars.dsl import expr, ir from cudf_polars.dsl.traversal import make_recursive, reuse_if_unchanged from cudf_polars.typing import NodeTraverser diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py index a27a3395c35..57c5fdaa7cf 100644 --- a/python/cudf_polars/cudf_polars/typing/__init__.py +++ b/python/cudf_polars/cudf_polars/typing/__init__.py @@ -8,10 +8,10 @@ from collections.abc import Hashable, Mapping from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union -import pylibcudf as plc - from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir +import pylibcudf as plc + if TYPE_CHECKING: from collections.abc import Callable from typing import TypeAlias diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py index 4154a404e98..1d0479802ca 100644 --- a/python/cudf_polars/cudf_polars/utils/dtypes.py +++ b/python/cudf_polars/cudf_polars/utils/dtypes.py @@ -8,11 +8,12 @@ from functools import cache import pyarrow as pa -import pylibcudf as plc from typing_extensions import assert_never import polars as pl +import pylibcudf as plc + __all__ = ["from_polars", "downcast_arrow_lists", "can_cast"] diff --git a/python/cudf_polars/cudf_polars/utils/versions.py b/python/cudf_polars/cudf_polars/utils/versions.py index 4a7ad6b3cf2..a119cab3b74 100644 --- a/python/cudf_polars/cudf_polars/utils/versions.py +++ b/python/cudf_polars/cudf_polars/utils/versions.py @@ -12,11 +12,12 @@ POLARS_VERSION = parse(__version__) -POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8") +POLARS_VERSION_LT_111 = POLARS_VERSION < parse("1.11") +POLARS_VERSION_LT_112 = POLARS_VERSION < parse("1.12") def _ensure_polars_version(): - if POLARS_VERSION_LT_18: + if POLARS_VERSION_LT_111: raise ImportError( - "cudf_polars requires py-polars v1.8 or greater." + "cudf_polars requires py-polars v1.11 or greater." ) # pragma: no cover diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 2afdab1be4b..a2c62ef9460 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "polars>=1.11,<1.12", + "polars>=1.11,<1.13", "pylibcudf==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -188,7 +188,7 @@ required-imports = ["from __future__ import annotations"] [tool.ruff.lint.isort.sections] polars = ["polars"] -rapids = ["rmm", "cudf"] +rapids = ["rmm", "pylibcudf"] [tool.ruff.format] docstring-code-format = true diff --git a/python/cudf_polars/tests/containers/test_column.py b/python/cudf_polars/tests/containers/test_column.py index 1f26ab1af9f..95541b4ecc3 100644 --- a/python/cudf_polars/tests/containers/test_column.py +++ b/python/cudf_polars/tests/containers/test_column.py @@ -4,9 +4,10 @@ from __future__ import annotations import pyarrow -import pylibcudf as plc import pytest +import pylibcudf as plc + from cudf_polars.containers import Column diff --git a/python/cudf_polars/tests/containers/test_dataframe.py b/python/cudf_polars/tests/containers/test_dataframe.py index 5c68fb8f0aa..d68c8d90163 100644 --- a/python/cudf_polars/tests/containers/test_dataframe.py +++ b/python/cudf_polars/tests/containers/test_dataframe.py @@ -3,11 +3,12 @@ from __future__ import annotations -import pylibcudf as plc import pytest import polars as pl +import pylibcudf as plc + from cudf_polars.containers import Column, DataFrame from cudf_polars.testing.asserts import assert_gpu_result_equal diff --git a/python/cudf_polars/tests/dsl/test_expr.py b/python/cudf_polars/tests/dsl/test_expr.py index 84e33262869..de8fec301fe 100644 --- a/python/cudf_polars/tests/dsl/test_expr.py +++ b/python/cudf_polars/tests/dsl/test_expr.py @@ -3,9 +3,10 @@ from __future__ import annotations -import pylibcudf as plc import pytest +import pylibcudf as plc + from cudf_polars.dsl import expr diff --git a/python/cudf_polars/tests/dsl/test_to_ast.py b/python/cudf_polars/tests/dsl/test_to_ast.py index a7b779a6ec9..57d794d4890 100644 --- a/python/cudf_polars/tests/dsl/test_to_ast.py +++ b/python/cudf_polars/tests/dsl/test_to_ast.py @@ -3,12 +3,13 @@ from __future__ import annotations -import pylibcudf as plc import pytest import polars as pl from polars.testing import assert_frame_equal +import pylibcudf as plc + import cudf_polars.dsl.ir as ir_nodes from cudf_polars import translate_ir from cudf_polars.containers.dataframe import DataFrame, NamedColumn diff --git a/python/cudf_polars/tests/dsl/test_traversal.py b/python/cudf_polars/tests/dsl/test_traversal.py index 6505a786855..15c644d7978 100644 --- a/python/cudf_polars/tests/dsl/test_traversal.py +++ b/python/cudf_polars/tests/dsl/test_traversal.py @@ -5,11 +5,11 @@ from functools import singledispatch -import pylibcudf as plc - import polars as pl from polars.testing import assert_frame_equal +import pylibcudf as plc + from cudf_polars import translate_ir from cudf_polars.dsl import expr, ir from cudf_polars.dsl.traversal import ( diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py index ced49bdc254..52bc4a9ac71 100644 --- a/python/cudf_polars/tests/expressions/test_literal.py +++ b/python/cudf_polars/tests/expressions/test_literal.py @@ -2,11 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import pylibcudf as plc import pytest import polars as pl +import pylibcudf as plc + from cudf_polars.testing.asserts import ( assert_gpu_result_equal, assert_ir_translation_raises, diff --git a/python/cudf_polars/tests/expressions/test_sort.py b/python/cudf_polars/tests/expressions/test_sort.py index 2a37683478b..62df8ce1498 100644 --- a/python/cudf_polars/tests/expressions/test_sort.py +++ b/python/cudf_polars/tests/expressions/test_sort.py @@ -4,11 +4,12 @@ import itertools -import pylibcudf as plc import pytest import polars as pl +import pylibcudf as plc + from cudf_polars import translate_ir from cudf_polars.testing.asserts import assert_gpu_result_equal diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py index 501560d15b8..8ca7a7b9264 100644 --- a/python/cudf_polars/tests/test_join.py +++ b/python/cudf_polars/tests/test_join.py @@ -13,6 +13,7 @@ assert_gpu_result_equal, assert_ir_translation_raises, ) +from cudf_polars.utils.versions import POLARS_VERSION_LT_112 @pytest.fixture(params=[False, True], ids=["nulls_not_equal", "nulls_equal"]) @@ -88,7 +89,7 @@ def test_left_join_with_slice(left, right, join_nulls, zlice): if zlice is not None: q_expect = q.collect().slice(*zlice) q = q.slice(*zlice) - if zlice == (1, 5) or zlice == (0, 2): + if POLARS_VERSION_LT_112 and (zlice == (1, 5) or zlice == (0, 2)): # https://github.com/pola-rs/polars/issues/19403 # https://github.com/pola-rs/polars/issues/19405 ctx = pytest.raises(AssertionError) diff --git a/python/cudf_polars/tests/utils/test_broadcast.py b/python/cudf_polars/tests/utils/test_broadcast.py index e7770bfadac..3b3b4f0f8db 100644 --- a/python/cudf_polars/tests/utils/test_broadcast.py +++ b/python/cudf_polars/tests/utils/test_broadcast.py @@ -3,9 +3,10 @@ from __future__ import annotations -import pylibcudf as plc import pytest +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.ir import broadcast From b5b47fe24480c3a57e58ec6646db03034d8f5d4a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 31 Oct 2024 17:30:59 -0500 Subject: [PATCH 74/76] use rapids-generate-pip-constraints to pin to oldest dependencies in CI (#17131) Follow-up to https://github.com/rapidsai/cudf/pull/16570#discussion_r1735300231 Proposes using the new `rapids-generate-pip-constraints` tool from `gha-tools` to generate a list of pip constraints pinning to the oldest supported verisons of dependencies here. ## Notes for Reviewers ### How I tested this https://github.com/rapidsai/gha-tools/pull/114#issuecomment-2445377824 You can also see one the most recent `wheel-tests-cudf` builds here: * oldest-deps: numpy 1.x ([build link](https://github.com/rapidsai/cudf/actions/runs/11615430314/job/32347576688?pr=17131)) * latest-deps: numpy 2.x ([build link](https://github.com/rapidsai/cudf/actions/runs/11615430314/job/32347577095?pr=17131)) # Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17131 --- ci/cudf_pandas_scripts/run_tests.sh | 11 ++--------- ci/test_wheel_cudf.sh | 11 ++--------- ci/test_wheel_cudf_polars.sh | 12 +++--------- ci/test_wheel_dask_cudf.sh | 12 +++--------- 4 files changed, 10 insertions(+), 36 deletions(-) diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh index f6bdc6f9484..61361fffb07 100755 --- a/ci/cudf_pandas_scripts/run_tests.sh +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -54,15 +54,8 @@ else RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist - echo "" > ./constraints.txt - if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - # `test_python_cudf_pandas` constraints are for `[test]` not `[cudf-pandas-tests]` - rapids-dependency-file-generator \ - --output requirements \ - --file-key test_python_cudf_pandas \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt - fi + # generate constraints (possibly pinning to oldest support versions of dependencies) + rapids-generate-pip-constraints test_python_cudf_pandas ./constraints.txt python -m pip install \ -v \ diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh index a701bfe15e0..ce12744c9e3 100755 --- a/ci/test_wheel_cudf.sh +++ b/ci/test_wheel_cudf.sh @@ -12,15 +12,8 @@ RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels rapids-logger "Install cudf, pylibcudf, and test requirements" -# Constrain to minimum dependency versions if job is set up as "oldest" -echo "" > ./constraints.txt -if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - rapids-dependency-file-generator \ - --output requirements \ - --file-key py_test_cudf \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt -fi +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints py_test_cudf ./constraints.txt # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh index 05f882a475b..2884757e46b 100755 --- a/ci/test_wheel_cudf_polars.sh +++ b/ci/test_wheel_cudf_polars.sh @@ -29,15 +29,9 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist rapids-logger "Installing cudf_polars and its dependencies" -# Constraint to minimum dependency versions if job is set up as "oldest" -echo "" > ./constraints.txt -if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - rapids-dependency-file-generator \ - --output requirements \ - --file-key py_test_cudf_polars \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt -fi + +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints py_test_cudf_polars ./constraints.txt # echo to expand wildcard before adding `[test]` requires for pip python -m pip install \ diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index 361a42ccda9..e15949f4bdb 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -12,15 +12,9 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements" -# Constraint to minimum dependency versions if job is set up as "oldest" -echo "" > ./constraints.txt -if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - rapids-dependency-file-generator \ - --output requirements \ - --file-key py_test_dask_cudf \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt -fi + +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints py_test_dask_cudf ./constraints.txt # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ From 0a8728425d53866a7bd201524a8f3e32b64ad16b Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Fri, 1 Nov 2024 09:40:00 -0400 Subject: [PATCH 75/76] Expose streams in public round APIs (#16925) Contributes to #13744 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/16925 --- cpp/include/cudf/round.hpp | 3 +++ cpp/src/round/round.cu | 3 ++- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/round_test.cpp | 40 ++++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 cpp/tests/streams/round_test.cpp diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index ba56ff34b97..158e6df7e5f 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -66,6 +67,7 @@ enum class rounding_method : int32_t { HALF_UP, HALF_EVEN }; * @param decimal_places Number of decimal places to round to (default 0). If negative, this * specifies the number of positions to the left of the decimal point. * @param method Rounding method + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * * @return Column with each of the values rounded @@ -74,6 +76,7 @@ std::unique_ptr round( column_view const& input, int32_t decimal_places = 0, rounding_method method = rounding_method::HALF_UP, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 8988d73fb02..332c440aea9 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -358,10 +358,11 @@ std::unique_ptr round(column_view const& input, std::unique_ptr round(column_view const& input, int32_t decimal_places, rounding_method method, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::round(input, decimal_places, method, cudf::get_default_stream(), mr); + return detail::round(input, decimal_places, method, stream, mr); } } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a5e1cf646b4..6d3d1454462 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -716,6 +716,7 @@ ConfigureTest(STREAM_REDUCTION_TEST streams/reduction_test.cpp STREAM_MODE testi ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_RESHAPE_TEST streams/reshape_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_ROUND_TEST streams/round_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/round_test.cpp b/cpp/tests/streams/round_test.cpp new file mode 100644 index 00000000000..b8fda022db8 --- /dev/null +++ b/cpp/tests/streams/round_test.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +class RoundTest : public cudf::test::BaseFixture {}; + +TEST_F(RoundTest, RoundHalfToEven) +{ + std::vector vals = {1.729, 17.29, 172.9, 1729}; + cudf::test::fixed_width_column_wrapper input(vals.begin(), vals.end()); + cudf::round(input, 0, cudf::rounding_method::HALF_UP, cudf::test::get_default_stream()); +} + +TEST_F(RoundTest, RoundHalfAwayFromEven) +{ + std::vector vals = {1.5, 2.5, 1.35, 1.45, 15, 25}; + cudf::test::fixed_width_column_wrapper input(vals.begin(), vals.end()); + cudf::round(input, -1, cudf::rounding_method::HALF_EVEN, cudf::test::get_default_stream()); +} From 8219d28161e4d193b44e1fd5d0d0417812ca6892 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Fri, 1 Nov 2024 11:10:48 -0400 Subject: [PATCH 76/76] Minor I/O code quality improvements (#17105) This PR makes small improvements for the I/O code. Specifically, - Place type constraint on a template class to allow only for rvalue argument. In addition, replace `std::move` with `std::forward` to make the code more *apparently* consistent with the convention, i.e. use `std::move()` on the rvalue references, and `std::forward` on the forwarding references (Effective modern C++ item 25). - Alleviate (but not completely resolve) an existing cuFile driver close issue by removing the explicit driver close call. See #17121 - Minor typo fix (`struct` → `class`). Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Nghia Truong (https://github.com/ttnghia) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17105 --- cpp/include/cudf/io/datasource.hpp | 21 ++++++++++++++------- cpp/src/io/utilities/file_io_utilities.cpp | 6 +++++- cpp/src/io/utilities/file_io_utilities.hpp | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp index 7d2cc4ad493..7bec40893fd 100644 --- a/cpp/include/cudf/io/datasource.hpp +++ b/cpp/include/cudf/io/datasource.hpp @@ -79,7 +79,7 @@ class datasource { template static std::unique_ptr create(Container&& data_owner) { - return std::make_unique>(std::move(data_owner)); + return std::make_unique>(std::forward(data_owner)); } }; @@ -335,13 +335,19 @@ class datasource { template class owning_buffer : public buffer { public: + // Require that the argument passed to the constructor be an rvalue (Container&& being an rvalue + // reference). + static_assert(std::is_rvalue_reference_v, + "The container argument passed to the constructor must be an rvalue."); + /** * @brief Moves the input container into the newly created object. * - * @param data_owner The container to construct the buffer from (ownership is transferred) + * @param moved_data_owner The container to construct the buffer from. Callers should explicitly + * pass std::move(data_owner) to this function to transfer the ownership. */ - owning_buffer(Container&& data_owner) - : _data(std::move(data_owner)), _data_ptr(_data.data()), _size(_data.size()) + owning_buffer(Container&& moved_data_owner) + : _data(std::move(moved_data_owner)), _data_ptr(_data.data()), _size(_data.size()) { } @@ -349,12 +355,13 @@ class datasource { * @brief Moves the input container into the newly created object, and exposes a subspan of the * buffer. * - * @param data_owner The container to construct the buffer from (ownership is transferred) + * @param moved_data_owner The container to construct the buffer from. Callers should explicitly + * pass std::move(data_owner) to this function to transfer the ownership. * @param data_ptr Pointer to the start of the subspan * @param size The size of the subspan */ - owning_buffer(Container&& data_owner, uint8_t const* data_ptr, size_t size) - : _data(std::move(data_owner)), _data_ptr(data_ptr), _size(size) + owning_buffer(Container&& moved_data_owner, uint8_t const* data_ptr, size_t size) + : _data(std::move(moved_data_owner)), _data_ptr(data_ptr), _size(size) { } diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 93cdccfbb9f..cf19bc591cc 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -108,7 +108,11 @@ class cufile_shim { ~cufile_shim() { - if (driver_close != nullptr) driver_close(); + // Explicit cuFile driver close should not be performed here to avoid segfault. However, in the + // absence of driver_close(), cuFile will implicitly do that, which in most cases causes + // segfault anyway. TODO: Revisit this conundrum once cuFile is fixed. + // https://github.com/rapidsai/cudf/issues/17121 + if (cf_lib != nullptr) dlclose(cf_lib); } diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 7e47b5b3d10..584b6213fa3 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -104,7 +104,7 @@ class cufile_shim; /** * @brief Class that provides RAII for cuFile file registration. */ -struct cufile_registered_file { +class cufile_registered_file { void register_handle(); public: