diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 8190b5d0297..315a389339a 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -31,6 +31,6 @@ ENV PYTHONDONTWRITEBYTECODE="1" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" -ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV HISTFILE="/home/coder/.cache/._bash_history" ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache" diff --git a/.github/workflows/auto-assign.yml b/.github/workflows/auto-assign.yml new file mode 100644 index 00000000000..1bf4ac08b69 --- /dev/null +++ b/.github/workflows/auto-assign.yml @@ -0,0 +1,17 @@ +name: "Auto Assign PR" + +on: + pull_request_target: + types: + - opened + - reopened + - synchronize + +jobs: + add_assignees: + runs-on: ubuntu-latest + steps: + - uses: actions-ecosystem/action-add-assignees@v1 + with: + github_token: "${{ secrets.GITHUB_TOKEN }}" + assignees: ${{ github.actor }} diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 31e78f82a62..f5cb71bfc14 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -1,4 +1,5 @@ name: "Pull Request Labeler" + on: - pull_request_target diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 174dc72bf02..f5234f58efe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,17 +16,6 @@ repos: ^cpp/cmake/thirdparty/patches/.*| ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* ) - - repo: https://github.com/PyCQA/isort - rev: 5.13.2 - hooks: - - id: isort - # Use the config file specific to each subproject so that each - # project can specify its own first/third-party packages. - args: ["--config-root=python/", "--resolve-all-configs"] - files: python/.* - exclude: | - (?x)^(^python/cudf_polars/.*) - types_or: [python, cython, pyi] - repo: https://github.com/MarcoGorelli/cython-lint rev: v0.16.2 hooks: @@ -150,6 +139,7 @@ repos: rev: v0.4.8 hooks: - id: ruff + args: ["--fix"] files: python/.*$ - id: ruff-format files: python/.*$ @@ -165,7 +155,7 @@ repos: ) - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.13.11 + rev: v1.16.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f9cdde7c2b7..3db1ed35294 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -38,6 +38,7 @@ conduct. More information can be found at: 8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks). Fix if needed. 9. Wait for other developers to review your code and update code as needed. + Changes to any C++ files require at least 2 approvals from the cudf-cpp-codeowners before merging. 10. Once reviewed and approved, a RAPIDS developer will merge your pull request. If you are unsure about anything, don't hesitate to comment on issues and ask for clarification! @@ -293,8 +294,8 @@ In order to run doxygen as a linter on C++/CUDA code, run ./ci/checks/doxygen.sh ``` -Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/), -[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/). +Python code runs several linters including [Ruff](https://docs.astral.sh/ruff/) +with its various rules for Black-like formatting or Isort. cuDF also uses [codespell](https://github.com/codespell-project/codespell) to find spelling mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes, diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index e5fcef17a83..3d06eacf9ff 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -15,8 +15,12 @@ rapids-print-env rapids-logger "Begin cpp build" +sccache --zero-stats + # With boa installed conda build forward to boa RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild \ conda/recipes/libcudf +sccache --show-adv-stats + rapids-upload-conda-to-s3 cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 823d7f62290..ed90041cc77 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -19,6 +19,8 @@ rapids-logger "Begin py build" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +sccache --zero-stats + # TODO: Remove `--no-test` flag once importing on a CPU # node works correctly # With boa installed conda build forwards to the boa builder @@ -28,12 +30,18 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --channel "${CPP_CHANNEL}" \ conda/recipes/pylibcudf +sccache --show-adv-stats +sccache --zero-stats + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cudf +sccache --show-adv-stats +sccache --zero-stats + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ @@ -46,6 +54,8 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cudf_kafka +sccache --show-adv-stats + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index bf76f4ed29a..78b8a8a08cf 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -3,7 +3,8 @@ set -euo pipefail -package_dir=$1 +package_name=$1 +package_dir=$2 source rapids-configure-sccache source rapids-date-string @@ -12,4 +13,14 @@ rapids-generate-version > ./VERSION cd "${package_dir}" -python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check +sccache --zero-stats + +rapids-logger "Building '${package_name}' wheel" +python -m pip wheel \ + -w dist \ + -v \ + --no-deps \ + --disable-pip-version-check \ + . + +sccache --show-adv-stats diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index fb93b06dbe2..fef4416a366 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -18,7 +18,7 @@ echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt export PIP_CONSTRAINT="/tmp/constraints.txt" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh cudf ${package_dir} python -m auditwheel repair \ --exclude libcudf.so \ diff --git a/ci/build_wheel_cudf_polars.sh b/ci/build_wheel_cudf_polars.sh index 9c945e11c00..79853cdbdb2 100755 --- a/ci/build_wheel_cudf_polars.sh +++ b/ci/build_wheel_cudf_polars.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/cudf_polars" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh cudf-polars ${package_dir} RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist +RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist diff --git a/ci/build_wheel_dask_cudf.sh b/ci/build_wheel_dask_cudf.sh index eb2a91289f7..00c64afa2ef 100755 --- a/ci/build_wheel_dask_cudf.sh +++ b/ci/build_wheel_dask_cudf.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/dask_cudf" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh dask-cudf ${package_dir} RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist +RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh index 91bc071583e..b3d6778ea04 100755 --- a/ci/build_wheel_libcudf.sh +++ b/ci/build_wheel_libcudf.sh @@ -3,10 +3,30 @@ set -euo pipefail +package_name="libcudf" package_dir="python/libcudf" +rapids-logger "Generating build requirements" + +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \ +| tee /tmp/requirements-build.txt + +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt + +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 + export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh "${package_name}" "${package_dir}" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" @@ -16,4 +36,4 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* -RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist" diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh index 5e9f7f8a0c4..839d98846fe 100755 --- a/ci/build_wheel_pylibcudf.sh +++ b/ci/build_wheel_pylibcudf.sh @@ -16,7 +16,7 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt export PIP_CONSTRAINT="/tmp/constraints.txt" -./ci/build_wheel.sh ${package_dir} +./ci/build_wheel.sh pylibcudf ${package_dir} python -m auditwheel repair \ --exclude libcudf.so \ @@ -24,4 +24,4 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* -RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh index f6bdc6f9484..61361fffb07 100755 --- a/ci/cudf_pandas_scripts/run_tests.sh +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -54,15 +54,8 @@ else RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist - echo "" > ./constraints.txt - if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - # `test_python_cudf_pandas` constraints are for `[test]` not `[cudf-pandas-tests]` - rapids-dependency-file-generator \ - --output requirements \ - --file-key test_python_cudf_pandas \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt - fi + # generate constraints (possibly pinning to oldest support versions of dependencies) + rapids-generate-pip-constraints test_python_cudf_pandas ./constraints.txt python -m pip install \ -v \ diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh index a701bfe15e0..ce12744c9e3 100755 --- a/ci/test_wheel_cudf.sh +++ b/ci/test_wheel_cudf.sh @@ -12,15 +12,8 @@ RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels rapids-logger "Install cudf, pylibcudf, and test requirements" -# Constrain to minimum dependency versions if job is set up as "oldest" -echo "" > ./constraints.txt -if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - rapids-dependency-file-generator \ - --output requirements \ - --file-key py_test_cudf \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt -fi +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints py_test_cudf ./constraints.txt # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh index 05f882a475b..2884757e46b 100755 --- a/ci/test_wheel_cudf_polars.sh +++ b/ci/test_wheel_cudf_polars.sh @@ -29,15 +29,9 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist rapids-logger "Installing cudf_polars and its dependencies" -# Constraint to minimum dependency versions if job is set up as "oldest" -echo "" > ./constraints.txt -if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - rapids-dependency-file-generator \ - --output requirements \ - --file-key py_test_cudf_polars \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt -fi + +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints py_test_cudf_polars ./constraints.txt # echo to expand wildcard before adding `[test]` requires for pip python -m pip install \ diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index 361a42ccda9..e15949f4bdb 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -12,15 +12,9 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements" -# Constraint to minimum dependency versions if job is set up as "oldest" -echo "" > ./constraints.txt -if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - rapids-dependency-file-generator \ - --output requirements \ - --file-key py_test_dask_cudf \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt -fi + +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints py_test_dask_cudf ./constraints.txt # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install \ diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index bd5e6c3d569..9d9fec97731 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -46,6 +46,7 @@ dependencies: - librdkafka>=2.5.0,<2.6.0a0 - librmm==24.12.*,>=0.0.0a0 - make +- mmh3 - moto>=4.0.8 - msgpack-python - myst-nb @@ -58,14 +59,14 @@ dependencies: - numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 -- nvcomp==4.0.1 +- nvcomp==4.1.0.6 - nvtx>=0.2.1 - openpyxl - packaging - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.8,<1.9 +- polars>=1.11,<1.13 - pre-commit - ptxcompiler - pyarrow>=14.0.0,<18.0.0a0 @@ -76,6 +77,7 @@ dependencies: - pytest-xdist - pytest<8 - python-confluent-kafka>=2.5.0,<2.6.0a0 +- python-xxhash - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 565a3ebfa3c..19e3eafd641 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -45,6 +45,7 @@ dependencies: - librdkafka>=2.5.0,<2.6.0a0 - librmm==24.12.*,>=0.0.0a0 - make +- mmh3 - moto>=4.0.8 - msgpack-python - myst-nb @@ -56,14 +57,14 @@ dependencies: - numba-cuda>=0.0.13 - numpy>=1.23,<3.0a0 - numpydoc -- nvcomp==4.0.1 +- nvcomp==4.1.0.6 - nvtx>=0.2.1 - openpyxl - packaging - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.8,<1.9 +- polars>=1.11,<1.13 - pre-commit - pyarrow>=14.0.0,<18.0.0a0 - pydata-sphinx-theme!=0.14.2 @@ -74,6 +75,7 @@ dependencies: - pytest-xdist - pytest<8 - python-confluent-kafka>=2.5.0,<2.6.0a0 +- python-xxhash - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml index e8fef715c60..edf92b930d9 100644 --- a/conda/recipes/cudf-polars/meta.yaml +++ b/conda/recipes/cudf-polars/meta.yaml @@ -43,7 +43,7 @@ requirements: run: - python - pylibcudf ={{ version }} - - polars >=1.8,<1.9 + - polars >=1.11,<1.12 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index dc75eb4b252..c78ca326005 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -35,7 +35,7 @@ spdlog_version: - ">=1.14.1,<1.15" nvcomp_version: - - "=4.0.1" + - "=4.1.0.6" zlib_version: - ">=1.2.13" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 32a753c9f40..bfa4bf80724 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -368,8 +368,16 @@ add_library( src/filling/repeat.cu src/filling/sequence.cu src/groupby/groupby.cu + src/groupby/hash/compute_groupby.cu + src/groupby/hash/compute_mapping_indices.cu + src/groupby/hash/compute_mapping_indices_null.cu + src/groupby/hash/compute_shared_memory_aggs.cu + src/groupby/hash/compute_single_pass_aggs.cu + src/groupby/hash/create_sparse_results_table.cu src/groupby/hash/flatten_single_pass_aggs.cpp src/groupby/hash/groupby.cu + src/groupby/hash/hash_compound_agg_finalizer.cu + src/groupby/hash/sparse_to_dense_results.cu src/groupby/sort/aggregate.cpp src/groupby/sort/group_argmax.cu src/groupby/sort/group_argmin.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index e61a8e6e1e6..68781889c53 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -49,7 +49,7 @@ target_compile_options( target_link_libraries( ndsh_data_generator - PUBLIC cudf GTest::gmock GTest::gtest cudf::cudftestutil nvtx3::nvtx3-cpp + PUBLIC cudf cudf::cudftestutil nvtx3::nvtx3-cpp PRIVATE $ ) @@ -345,18 +345,17 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary # ################################################################################################## # * nvtext benchmark ------------------------------------------------------------------- -ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp) +ConfigureBench(TEXT_BENCH text/subword.cpp) ConfigureNVBench( - TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp - text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp text/word_minhash.cpp + TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/ngrams.cpp + text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp ) # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- ConfigureBench( STRINGS_BENCH - string/combine.cpp string/convert_datetime.cpp string/convert_durations.cpp string/convert_fixed_point.cpp @@ -374,6 +373,7 @@ ConfigureNVBench( STRINGS_NVBENCH string/case.cpp string/char_types.cpp + string/combine.cpp string/contains.cpp string/copy_if_else.cpp string/copy_range.cpp diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp index f44f26e4d2c..2533ea9611c 100644 --- a/cpp/benchmarks/ast/transform.cpp +++ b/cpp/benchmarks/ast/transform.cpp @@ -16,16 +16,29 @@ #include +#include + +#include +#include +#include +#include #include #include +#include +#include #include #include #include +#include #include +#include +#include +#include +#include #include #include #include @@ -39,14 +52,14 @@ enum class TreeType { template static void BM_ast_transform(nvbench::state& state) { - auto const table_size = static_cast(state.get_int64("table_size")); + auto const num_rows = static_cast(state.get_int64("num_rows")); auto const tree_levels = static_cast(state.get_int64("tree_levels")); // Create table data auto const n_cols = reuse_columns ? 1 : tree_levels + 1; auto const source_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), - row_count{table_size}, + row_count{num_rows}, Nullable ? std::optional{0.5} : std::nullopt); auto table = source_table->view(); @@ -86,7 +99,71 @@ static void BM_ast_transform(nvbench::state& state) auto const& expression_tree_root = expressions.back(); // Use the number of bytes read from global memory - state.add_global_memory_reads(table_size * (tree_levels + 1)); + state.add_global_memory_reads(static_cast(num_rows) * (tree_levels + 1)); + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); +} + +template +static void BM_string_compare_ast_transform(nvbench::state& state) +{ + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const tree_levels = static_cast(state.get_int64("tree_levels")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons"); + + // Create table data + auto const num_cols = tree_levels * 2; + std::vector> columns; + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { + columns.emplace_back(create_string_column(num_rows, string_width, hit_rate)); + }); + + cudf::table table{std::move(columns)}; + cudf::table_view const table_view = table.view(); + + int64_t const chars_size = std::accumulate( + table_view.begin(), + table_view.end(), + static_cast(0), + [](int64_t size, auto& column) -> int64_t { + return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream()); + }); + + // Create column references + auto column_refs = std::vector(); + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_cols), + std::back_inserter(column_refs), + [](auto const& column_id) { return cudf::ast::column_reference(column_id); }); + + // Create expression trees + std::list expressions; + + // Construct AST tree (a == b && c == d && e == f && ...) + + expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1])); + + std::for_each(thrust::make_counting_iterator(1), + thrust::make_counting_iterator(tree_levels), + [&](size_t idx) { + auto const& lhs = expressions.back(); + auto const& rhs = expressions.emplace_back( + cudf::ast::operation(cmp_op, column_refs[idx * 2], column_refs[idx * 2 + 1])); + expressions.emplace_back(cudf::ast::operation(reduce_op, lhs, rhs)); + }); + + auto const& expression_tree_root = expressions.back(); + + // Use the number of bytes read from global memory + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); @@ -100,7 +177,7 @@ static void BM_ast_transform(nvbench::state& state) NVBENCH_BENCH(name) \ .set_name(#name) \ .add_int64_axis("tree_levels", {1, 5, 10}) \ - .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) AST_TRANSFORM_BENCHMARK_DEFINE( ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false); @@ -115,3 +192,19 @@ AST_TRANSFORM_BENCHMARK_DEFINE( ast_int32_imbalanced_reuse_nulls, int32_t, TreeType::IMBALANCED_LEFT, true, true); AST_TRANSFORM_BENCHMARK_DEFINE( ast_double_imbalanced_unique_nulls, double, TreeType::IMBALANCED_LEFT, false, true); + +#define AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \ + static void name(::nvbench::state& st) \ + { \ + ::BM_string_compare_ast_transform(st); \ + } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("string_width", {32, 64, 128, 256}) \ + .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ + .add_int64_axis("tree_levels", {1, 2, 3, 4}) \ + .add_int64_axis("hit_rate", {50, 100}) + +AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and, + cudf::ast::ast_operator::EQUAL, + cudf::ast::ast_operator::LOGICAL_AND); diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp index 7d267a88764..75c91d270a7 100644 --- a/cpp/benchmarks/binaryop/binaryop.cpp +++ b/cpp/benchmarks/binaryop/binaryop.cpp @@ -17,12 +17,18 @@ #include #include +#include +#include #include #include +#include + #include #include +#include +#include // This set of benchmarks is designed to be a comparison for the AST benchmarks @@ -34,17 +40,18 @@ enum class TreeType { template static void BM_binaryop_transform(nvbench::state& state) { - auto const table_size{static_cast(state.get_int64("table_size"))}; + auto const num_rows{static_cast(state.get_int64("num_rows"))}; auto const tree_levels{static_cast(state.get_int64("tree_levels"))}; // Create table data auto const n_cols = reuse_columns ? 1 : tree_levels + 1; auto const source_table = create_sequence_table( - cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{table_size}); + cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{num_rows}); cudf::table_view table{*source_table}; // Use the number of bytes read from global memory - state.add_global_memory_reads(table_size * (tree_levels + 1)); + state.add_global_memory_reads(static_cast(num_rows) * (tree_levels + 1)); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { // Execute tree that chains additions like (((a + b) + c) + d) @@ -64,15 +71,69 @@ static void BM_binaryop_transform(nvbench::state& state) }); } +template +static void BM_string_compare_binaryop_transform(nvbench::state& state) +{ + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const tree_levels = static_cast(state.get_int64("tree_levels")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons"); + + // Create table data + auto const num_cols = tree_levels * 2; + std::vector> columns; + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { + columns.emplace_back(create_string_column(num_rows, string_width, hit_rate)); + }); + + cudf::table table{std::move(columns)}; + cudf::table_view const table_view = table.view(); + + int64_t const chars_size = std::accumulate( + table_view.begin(), table_view.end(), static_cast(0), [](int64_t size, auto& column) { + return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream()); + }); + + // Create column references + + // Use the number of bytes read from global memory + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); + + // Construct binary operations (a == b && c == d && e == f && ...) + auto constexpr bool_type = cudf::data_type{cudf::type_id::BOOL8}; + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream{launch.get_stream().get_stream()}; + std::unique_ptr reduction = + cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream); + std::for_each( + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(tree_levels), + [&](size_t idx) { + std::unique_ptr comparison = cudf::binary_operation( + table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream); + std::unique_ptr reduced = + cudf::binary_operation(*comparison, *reduction, reduce_op, bool_type, stream); + stream.synchronize(); + reduction = std::move(reduced); + }); + }); +} + #define BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns) \ \ static void name(::nvbench::state& st) \ { \ - BM_binaryop_transform(st); \ + ::BM_binaryop_transform(st); \ } \ NVBENCH_BENCH(name) \ .add_int64_axis("tree_levels", {1, 2, 5, 10}) \ - .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique, int32_t, @@ -86,3 +147,20 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique, double, TreeType::IMBALANCED_LEFT, false); + +#define STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \ + \ + static void name(::nvbench::state& st) \ + { \ + ::BM_string_compare_binaryop_transform(st); \ + } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("string_width", {32, 64, 128, 256}) \ + .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ + .add_int64_axis("tree_levels", {1, 2, 3, 4}) \ + .add_int64_axis("hit_rate", {50, 100}) + +STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform, + cudf::binary_operator::EQUAL, + cudf::binary_operator::LOGICAL_AND); diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index bc0ff69bce9..426f44a4fa1 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -23,10 +23,10 @@ template void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) { - auto const table_size = static_cast(state.get_int64("table_size")); + auto const num_rows = static_cast(state.get_int64("num_rows")); auto const source_table = create_random_table( - {cudf::type_to_id(), cudf::type_to_id()}, row_count{table_size}); + {cudf::type_to_id(), cudf::type_to_id()}, row_count{num_rows}); auto lhs = cudf::column_view(source_table->get_column(0)); auto rhs = cudf::column_view(source_table->get_column(1)); @@ -37,9 +37,9 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) cudf::binary_operation(lhs, rhs, binop, output_dtype); // use number of bytes read and written to global memory - state.add_global_memory_reads(table_size); - state.add_global_memory_reads(table_size); - state.add_global_memory_reads(table_size); + state.add_global_memory_reads(num_rows); + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); }); @@ -55,7 +55,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) } \ NVBENCH_BENCH(name) \ .set_name("compiled_binary_op_" BM_STRINGIFY(name)) \ - .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000}) #define build_name(a, b, c, d) a##_##b##_##c##_##d diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index dc258e32dc5..bdce8a31176 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -17,13 +17,17 @@ #include "generate_input.hpp" #include "random_distribution_factory.cuh" +#include + #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -918,6 +922,58 @@ std::unique_ptr create_sequence_table(std::vector co return std::make_unique(std::move(columns)); } +std::unique_ptr create_string_column(cudf::size_type num_rows, + cudf::size_type row_width, + int32_t hit_rate) +{ + // build input table using the following data + auto raw_data = cudf::test::strings_column_wrapper( + { + "123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns; + "012345 6789 01234 56789 0123 456", // the rest do not match + "abc 4567890 DEFGHI 0987 Wxyz 123", + "abcdefghijklmnopqrstuvwxyz 01234", + "", + "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", + "9876543210,abcdefghijklmnopqrstU", + "9876543210,abcdefghijklmnopqrstU", + "123 édf 4567890 DéFG 0987 X5", + "1", + }) + .release(); + + if (row_width / 32 > 1) { + std::vector columns; + for (int i = 0; i < row_width / 32; ++i) { + columns.push_back(raw_data->view()); + } + raw_data = cudf::strings::concatenate(cudf::table_view(columns)); + } + auto data_view = raw_data->view(); + + // compute number of rows in n_rows that should match + auto const num_matches = (static_cast(num_rows) * hit_rate) / 100; + + // Create a randomized gather-map to build a column out of the strings in data. + data_profile gather_profile = + data_profile_builder().cardinality(0).null_probability(0.0).distribution( + cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); + auto gather_table = + create_random_table({cudf::type_id::INT32}, row_count{num_rows}, gather_profile); + gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + + // Create scatter map by placing 0-index values throughout the gather-map + auto scatter_data = cudf::sequence(num_matches, + cudf::numeric_scalar(0), + cudf::numeric_scalar(num_rows / num_matches)); + auto zero_scalar = cudf::numeric_scalar(0); + auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); + auto gather_map = table->view().column(0); + table = cudf::gather(cudf::table_view({data_view}), gather_map); + + return std::move(table->release().front()); +} + std::pair create_random_null_mask( cudf::size_type size, std::optional null_probability, unsigned seed) { diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index 68d3dc492f5..57834fd11d2 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -670,6 +670,18 @@ std::unique_ptr create_random_column(cudf::type_id dtype_id, data_profile const& data_params = data_profile{}, unsigned seed = 1); +/** + * @brief Deterministically generates a large string column filled with data with the given + * parameters. + * + * @param num_rows Number of rows in the output column + * @param row_width Width of each string in the column + * @param hit_rate The hit rate percentage, ranging from 0 to 100 + */ +std::unique_ptr create_string_column(cudf::size_type num_rows, + cudf::size_type row_width, + int32_t hit_rate); + /** * @brief Generate sequence columns starting with value 0 in first row and increasing by 1 in * subsequent rows. diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index fe24fb58728..45b46005c47 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -186,7 +186,7 @@ std::string exec_cmd(std::string_view cmd) std::fflush(nullptr); // Switch stderr and stdout to only capture stderr auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null"); - std::unique_ptr pipe(popen(redirected_cmd.c_str(), "r"), pclose); + std::unique_ptr pipe(popen(redirected_cmd.c_str(), "r"), pclose); CUDF_EXPECTS(pipe != nullptr, "popen() failed"); std::array buffer; diff --git a/cpp/benchmarks/ndsh/q01.cpp b/cpp/benchmarks/ndsh/q01.cpp index ef709926ae9..485e8e5497c 100644 --- a/cpp/benchmarks/ndsh/q01.cpp +++ b/cpp/benchmarks/ndsh/q01.cpp @@ -104,7 +104,7 @@ } void run_ndsh_q1(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Define the column projections and filter predicate for `lineitem` table std::vector const lineitem_cols = {"l_returnflag", @@ -124,8 +124,8 @@ void run_ndsh_q1(nvbench::state& state, cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal); // Read out the `lineitem` table from parquet file - auto lineitem = - read_parquet(sources["lineitem"].make_source_info(), lineitem_cols, std::move(lineitem_pred)); + auto lineitem = read_parquet( + sources.at("lineitem").make_source_info(), lineitem_cols, std::move(lineitem_pred)); // Calculate the discount price and charge columns and append to lineitem table auto disc_price = @@ -170,7 +170,7 @@ void ndsh_q1(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); auto stream = cudf::get_default_stream(); diff --git a/cpp/benchmarks/ndsh/q05.cpp b/cpp/benchmarks/ndsh/q05.cpp index 522bc4789c2..1c2d657913e 100644 --- a/cpp/benchmarks/ndsh/q05.cpp +++ b/cpp/benchmarks/ndsh/q05.cpp @@ -89,7 +89,7 @@ } void run_ndsh_q5(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; @@ -120,17 +120,17 @@ void run_ndsh_q5(nvbench::state& state, // Read out the tables from parquet files // while pushing down the column projections and filter predicates auto const customer = - read_parquet(sources["customer"].make_source_info(), {"c_custkey", "c_nationkey"}); + read_parquet(sources.at("customer").make_source_info(), {"c_custkey", "c_nationkey"}); auto const orders = - read_parquet(sources["orders"].make_source_info(), orders_cols, std::move(orders_pred)); - auto const lineitem = read_parquet(sources["lineitem"].make_source_info(), + read_parquet(sources.at("orders").make_source_info(), orders_cols, std::move(orders_pred)); + auto const lineitem = read_parquet(sources.at("lineitem").make_source_info(), {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); auto const supplier = - read_parquet(sources["supplier"].make_source_info(), {"s_suppkey", "s_nationkey"}); + read_parquet(sources.at("supplier").make_source_info(), {"s_suppkey", "s_nationkey"}); auto const nation = - read_parquet(sources["nation"].make_source_info(), {"n_nationkey", "n_regionkey", "n_name"}); + read_parquet(sources.at("nation").make_source_info(), {"n_nationkey", "n_regionkey", "n_name"}); auto const region = - read_parquet(sources["region"].make_source_info(), region_cols, std::move(region_pred)); + read_parquet(sources.at("region").make_source_info(), region_cols, std::move(region_pred)); // Perform the joins auto const join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); @@ -165,7 +165,7 @@ void ndsh_q5(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources( scale_factor, {"customer", "orders", "lineitem", "supplier", "nation", "region"}, sources); diff --git a/cpp/benchmarks/ndsh/q06.cpp b/cpp/benchmarks/ndsh/q06.cpp index 04078547973..e1e56c3622e 100644 --- a/cpp/benchmarks/ndsh/q06.cpp +++ b/cpp/benchmarks/ndsh/q06.cpp @@ -64,7 +64,7 @@ } void run_ndsh_q6(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Read out the `lineitem` table from parquet file std::vector const lineitem_cols = { @@ -83,8 +83,8 @@ void run_ndsh_q6(nvbench::state& state, cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal); auto const lineitem_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b); - auto lineitem = - read_parquet(sources["lineitem"].make_source_info(), lineitem_cols, std::move(lineitem_pred)); + auto lineitem = read_parquet( + sources.at("lineitem").make_source_info(), lineitem_cols, std::move(lineitem_pred)); // Cast the discount and quantity columns to float32 and append to lineitem table auto discout_float = @@ -134,7 +134,7 @@ void ndsh_q6(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); auto stream = cudf::get_default_stream(); diff --git a/cpp/benchmarks/ndsh/q09.cpp b/cpp/benchmarks/ndsh/q09.cpp index 59218ab8912..2e9a69d9ee2 100644 --- a/cpp/benchmarks/ndsh/q09.cpp +++ b/cpp/benchmarks/ndsh/q09.cpp @@ -112,20 +112,21 @@ } void run_ndsh_q9(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Read out the table from parquet files auto const lineitem = read_parquet( - sources["lineitem"].make_source_info(), + sources.at("lineitem").make_source_info(), {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); - auto const nation = read_parquet(sources["nation"].make_source_info(), {"n_nationkey", "n_name"}); + auto const nation = + read_parquet(sources.at("nation").make_source_info(), {"n_nationkey", "n_name"}); auto const orders = - read_parquet(sources["orders"].make_source_info(), {"o_orderkey", "o_orderdate"}); - auto const part = read_parquet(sources["part"].make_source_info(), {"p_partkey", "p_name"}); - auto const partsupp = read_parquet(sources["partsupp"].make_source_info(), + read_parquet(sources.at("orders").make_source_info(), {"o_orderkey", "o_orderdate"}); + auto const part = read_parquet(sources.at("part").make_source_info(), {"p_partkey", "p_name"}); + auto const partsupp = read_parquet(sources.at("partsupp").make_source_info(), {"ps_suppkey", "ps_partkey", "ps_supplycost"}); auto const supplier = - read_parquet(sources["supplier"].make_source_info(), {"s_suppkey", "s_nationkey"}); + read_parquet(sources.at("supplier").make_source_info(), {"s_suppkey", "s_nationkey"}); // Generating the `profit` table // Filter the part table using `p_name like '%green%'` @@ -178,7 +179,7 @@ void ndsh_q9(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources( scale_factor, {"part", "supplier", "lineitem", "partsupp", "orders", "nation"}, sources); diff --git a/cpp/benchmarks/ndsh/q10.cpp b/cpp/benchmarks/ndsh/q10.cpp index a520480020a..72edd15083d 100644 --- a/cpp/benchmarks/ndsh/q10.cpp +++ b/cpp/benchmarks/ndsh/q10.cpp @@ -94,7 +94,7 @@ } void run_ndsh_q10(nvbench::state& state, - std::unordered_map& sources) + std::unordered_map& sources) { // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; @@ -122,15 +122,16 @@ void run_ndsh_q10(nvbench::state& state, // Read out the tables from parquet files // while pushing down the column projections and filter predicates auto const customer = read_parquet( - sources["customer"].make_source_info(), + sources.at("customer").make_source_info(), {"c_custkey", "c_name", "c_nationkey", "c_acctbal", "c_address", "c_phone", "c_comment"}); auto const orders = - read_parquet(sources["orders"].make_source_info(), orders_cols, std::move(orders_pred)); + read_parquet(sources.at("orders").make_source_info(), orders_cols, std::move(orders_pred)); auto const lineitem = - read_parquet(sources["lineitem"].make_source_info(), + read_parquet(sources.at("lineitem").make_source_info(), {"l_extendedprice", "l_discount", "l_orderkey", "l_returnflag"}, std::move(lineitem_pred)); - auto const nation = read_parquet(sources["nation"].make_source_info(), {"n_name", "n_nationkey"}); + auto const nation = + read_parquet(sources.at("nation").make_source_info(), {"n_name", "n_nationkey"}); // Perform the joins auto const join_a = apply_inner_join(customer, nation, {"c_nationkey"}, {"n_nationkey"}); @@ -163,7 +164,7 @@ void ndsh_q10(nvbench::state& state) { // Generate the required parquet files in device buffers double const scale_factor = state.get_float64("scale_factor"); - std::unordered_map sources; + std::unordered_map sources; generate_parquet_data_sources( scale_factor, {"customer", "orders", "lineitem", "nation"}, sources); diff --git a/cpp/benchmarks/ndsh/utilities.cpp b/cpp/benchmarks/ndsh/utilities.cpp index 62116ddf661..9f9849860c9 100644 --- a/cpp/benchmarks/ndsh/utilities.cpp +++ b/cpp/benchmarks/ndsh/utilities.cpp @@ -17,6 +17,8 @@ #include "utilities.hpp" #include "common/ndsh_data_generator/ndsh_data_generator.hpp" +#include "common/table_utilities.hpp" +#include "cudf/detail/utilities/integer_utils.hpp" #include #include @@ -30,8 +32,15 @@ #include #include +#include +#include +#include + +#include #include #include +#include +#include namespace { @@ -85,6 +94,15 @@ std::vector const NATION_SCHEMA = { "n_nationkey", "n_name", "n_regionkey", "n_comment"}; std::vector const REGION_SCHEMA = {"r_regionkey", "r_name", "r_comment"}; +std::unordered_map const> const SCHEMAS = { + {"orders", ORDERS_SCHEMA}, + {"lineitem", LINEITEM_SCHEMA}, + {"part", PART_SCHEMA}, + {"partsupp", PARTSUPP_SCHEMA}, + {"supplier", SUPPLIER_SCHEMA}, + {"customer", CUSTOMER_SCHEMA}, + {"nation", NATION_SCHEMA}, + {"region", REGION_SCHEMA}}; } // namespace cudf::table_view table_with_names::table() const { return tbl->view(); } @@ -337,7 +355,7 @@ int32_t days_since_epoch(int year, int month, int day) void write_to_parquet_device_buffer(std::unique_ptr const& table, std::vector const& col_names, - parquet_device_buffer& source) + cuio_source_sink_pair& source) { CUDF_FUNC_RANGE(); auto const stream = cudf::get_default_stream(); @@ -351,55 +369,124 @@ void write_to_parquet_device_buffer(std::unique_ptr const& table, metadata.schema_info = col_name_infos; auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; - // Declare a host and device buffer - std::vector h_buffer; - + auto est_size = static_cast(estimate_size(table->view())); + constexpr auto PQ_MAX_TABLE_BYTES = 8ul << 30; // 8GB + // TODO: best to get this limit from percent_of_free_device_memory(50) of device memory resource. + if (est_size > PQ_MAX_TABLE_BYTES) { + auto builder = cudf::io::chunked_parquet_writer_options::builder(source.make_sink_info()); + builder.metadata(table_input_metadata); + auto const options = builder.build(); + auto num_splits = static_cast( + std::ceil(static_cast(est_size) / (PQ_MAX_TABLE_BYTES))); + std::vector splits(num_splits - 1); + auto num_rows = table->num_rows(); + auto num_row_per_chunk = cudf::util::div_rounding_up_safe(num_rows, num_splits); + std::generate_n(splits.begin(), splits.size(), [num_row_per_chunk, i = 0]() mutable { + return (i += num_row_per_chunk); + }); + std::vector split_tables = cudf::split(table->view(), splits, stream); + auto writer = cudf::io::parquet_chunked_writer(options, stream); + for (auto const& chunk_table : split_tables) { + writer.write(chunk_table); + } + writer.close(); + return; + } // Write parquet data to host buffer - auto builder = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&h_buffer), table->view()); + auto builder = cudf::io::parquet_writer_options::builder(source.make_sink_info(), table->view()); builder.metadata(table_input_metadata); auto const options = builder.build(); - cudf::io::write_parquet(options); + cudf::io::write_parquet(options, stream); +} - // Copy host buffer to device buffer - source.d_buffer.resize(h_buffer.size(), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync( - source.d_buffer.data(), h_buffer.data(), h_buffer.size(), cudaMemcpyDefault, stream.value())); +inline auto make_managed_pool() +{ + return rmm::mr::make_owning_wrapper( + std::make_shared(), rmm::percent_of_free_device_memory(50)); } void generate_parquet_data_sources(double scale_factor, std::vector const& table_names, - std::unordered_map& sources) + std::unordered_map& sources) { CUDF_FUNC_RANGE(); - std::for_each(table_names.begin(), table_names.end(), [&](auto const& table_name) { - sources[table_name] = parquet_device_buffer(); - }); - auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + // Set the memory resource to the managed pool + auto old_mr = cudf::get_current_device_resource(); + // if already managed pool or managed, don't create new one. + using managed_pool_mr_t = decltype(make_managed_pool()); + managed_pool_mr_t managed_pool_mr; + bool const is_managed = + dynamic_cast*>(old_mr) or + dynamic_cast(old_mr); + if (!is_managed) { + std::cout << "Creating managed pool just for data generation\n"; + managed_pool_mr = make_managed_pool(); + cudf::set_current_device_resource(managed_pool_mr.get()); + // drawback: if already pool takes 50% of free memory, we are left with 50% of 50% of free + // memory. + } - auto partsupp = cudf::datagen::generate_partsupp( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + std::unordered_set const requested_table_names = [&table_names]() { + if (table_names.empty()) { + return std::unordered_set{ + "orders", "lineitem", "part", "partsupp", "supplier", "customer", "nation", "region"}; + } + return std::unordered_set(table_names.begin(), table_names.end()); + }(); + std::for_each( + requested_table_names.begin(), requested_table_names.end(), [&](auto const& table_name) { + sources.emplace(table_name, cuio_source_sink_pair(io_type::HOST_BUFFER)); + }); + std::unordered_map> tables; + + if (sources.count("orders") or sources.count("lineitem") or sources.count("part")) { + auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + if (sources.count("orders")) { + write_to_parquet_device_buffer(orders, SCHEMAS.at("orders"), sources.at("orders")); + orders = {}; + } + if (sources.count("part")) { + write_to_parquet_device_buffer(part, SCHEMAS.at("part"), sources.at("part")); + part = {}; + } + if (sources.count("lineitem")) { + write_to_parquet_device_buffer(lineitem, SCHEMAS.at("lineitem"), sources.at("lineitem")); + lineitem = {}; + } + } + + if (sources.count("partsupp")) { + auto partsupp = cudf::datagen::generate_partsupp( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(partsupp, SCHEMAS.at("partsupp"), sources.at("partsupp")); + } - auto supplier = cudf::datagen::generate_supplier( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + if (sources.count("supplier")) { + auto supplier = cudf::datagen::generate_supplier( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(supplier, SCHEMAS.at("supplier"), sources.at("supplier")); + } - auto customer = cudf::datagen::generate_customer( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + if (sources.count("customer")) { + auto customer = cudf::datagen::generate_customer( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(customer, SCHEMAS.at("customer"), sources.at("customer")); + } - auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); + if (sources.count("nation")) { + auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(nation, SCHEMAS.at("nation"), sources.at("nation")); + } - auto region = cudf::datagen::generate_region(cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); + if (sources.count("region")) { + auto region = cudf::datagen::generate_region(cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + write_to_parquet_device_buffer(region, SCHEMAS.at("region"), sources.at("region")); + } - write_to_parquet_device_buffer(std::move(orders), ORDERS_SCHEMA, sources["orders"]); - write_to_parquet_device_buffer(std::move(lineitem), LINEITEM_SCHEMA, sources["lineitem"]); - write_to_parquet_device_buffer(std::move(part), PART_SCHEMA, sources["part"]); - write_to_parquet_device_buffer(std::move(partsupp), PARTSUPP_SCHEMA, sources["partsupp"]); - write_to_parquet_device_buffer(std::move(customer), CUSTOMER_SCHEMA, sources["customer"]); - write_to_parquet_device_buffer(std::move(supplier), SUPPLIER_SCHEMA, sources["supplier"]); - write_to_parquet_device_buffer(std::move(nation), NATION_SCHEMA, sources["nation"]); - write_to_parquet_device_buffer(std::move(region), REGION_SCHEMA, sources["region"]); + // Restore the original memory resource + if (!is_managed) { cudf::set_current_device_resource(old_mr); } } diff --git a/cpp/benchmarks/ndsh/utilities.hpp b/cpp/benchmarks/ndsh/utilities.hpp index 762e43deccf..cae07f86a98 100644 --- a/cpp/benchmarks/ndsh/utilities.hpp +++ b/cpp/benchmarks/ndsh/utilities.hpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "io/cuio_common.hpp" + #include #include #include @@ -196,24 +198,15 @@ std::tm make_tm(int year, int month, int day); int32_t days_since_epoch(int year, int month, int day); /** - * @brief Struct representing a parquet device buffer - */ -struct parquet_device_buffer { - parquet_device_buffer() : d_buffer{0, cudf::get_default_stream()} {}; - cudf::io::source_info make_source_info() { return cudf::io::source_info(d_buffer); } - rmm::device_uvector d_buffer; -}; - -/** - * @brief Write a `cudf::table` to a parquet device buffer + * @brief Write a `cudf::table` to a parquet cuio sink * * @param table The `cudf::table` to write * @param col_names The column names of the table - * @param parquet_device_buffer The parquet device buffer to write the table to + * @param source The source sink pair to write the table to */ void write_to_parquet_device_buffer(std::unique_ptr const& table, std::vector const& col_names, - parquet_device_buffer& source); + cuio_source_sink_pair& source); /** * @brief Generate NDS-H tables and write to parquet device buffers @@ -224,4 +217,4 @@ void write_to_parquet_device_buffer(std::unique_ptr const& table, */ void generate_parquet_data_sources(double scale_factor, std::vector const& table_names, - std::unordered_map& sources); + std::unordered_map& sources); diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp index 7acfb1ffb0d..d6ccfae63e8 100644 --- a/cpp/benchmarks/string/combine.cpp +++ b/cpp/benchmarks/string/combine.cpp @@ -14,57 +14,41 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include -#include -#include #include #include #include #include -class StringCombine : public cudf::benchmark {}; +#include -static void BM_combine(benchmark::State& state) +static void bench_combine(nvbench::state& state) { - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; - data_profile const table_profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); auto const table = create_random_table( - {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{n_rows}, table_profile); + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, profile); cudf::strings_column_view input1(table->view().column(0)); cudf::strings_column_view input2(table->view().column(1)); cudf::string_scalar separator("+"); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::strings::concatenate(table->view(), separator); - } - - state.SetBytesProcessed(state.iterations() * (input1.chars_size(cudf::get_default_stream()) + - input2.chars_size(cudf::get_default_stream()))); -} + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = + input1.chars_size(stream) + input2.chars_size(stream) + (num_rows * separator.size()); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 4; - int const max_rowlen = 1 << 11; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::strings::concatenate(table->view(), separator); + }); } -#define STRINGS_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringCombine, name) \ - (::benchmark::State & st) { BM_combine(st); } \ - BENCHMARK_REGISTER_F(StringCombine, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -STRINGS_BENCHMARK_DEFINE(concat) +NVBENCH_BENCH(bench_combine) + .set_name("concat") + .add_int64_axis("row_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}); diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index ae6c8b844c8..a73017dda18 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -17,10 +17,6 @@ #include #include -#include - -#include -#include #include #include #include @@ -28,57 +24,6 @@ #include -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate) -{ - // build input table using the following data - auto raw_data = cudf::test::strings_column_wrapper( - { - "123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns; - "012345 6789 01234 56789 0123 456", // the rest do not match - "abc 4567890 DEFGHI 0987 Wxyz 123", - "abcdefghijklmnopqrstuvwxyz 01234", - "", - "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", - "9876543210,abcdefghijklmnopqrstU", - "9876543210,abcdefghijklmnopqrstU", - "123 édf 4567890 DéFG 0987 X5", - "1", - }) - .release(); - - if (row_width / 32 > 1) { - std::vector columns; - for (int i = 0; i < row_width / 32; ++i) { - columns.push_back(raw_data->view()); - } - raw_data = cudf::strings::concatenate(cudf::table_view(columns)); - } - auto data_view = raw_data->view(); - - // compute number of rows in n_rows that should match - auto matches = static_cast(n_rows * hit_rate) / 100; - - // Create a randomized gather-map to build a column out of the strings in data. - data_profile gather_profile = - data_profile_builder().cardinality(0).null_probability(0.0).distribution( - cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); - auto gather_table = - create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); - gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); - - // Create scatter map by placing 0-index values throughout the gather-map - auto scatter_data = cudf::sequence( - matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); - auto zero_scalar = cudf::numeric_scalar(0); - auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); - auto gather_map = table->view().column(0); - table = cudf::gather(cudf::table_view({data_view}), gather_map); - - return std::move(table->release().front()); -} - // longer pattern lengths demand more working memory per string std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$", "5W43"}; @@ -94,7 +39,7 @@ static void bench_contains(nvbench::state& state) state.skip("Skip benchmarks greater than size_type limit"); } - auto col = build_input_column(n_rows, row_width, hit_rate); + auto col = create_string_column(n_rows, row_width, hit_rate); auto input = cudf::strings_column_view(col->view()); auto pattern = patterns[pattern_index]; diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index a9c620e4bf0..996bdcf0332 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -19,7 +19,6 @@ #include -#include #include #include #include @@ -29,10 +28,6 @@ #include -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate); - static void bench_find_string(nvbench::state& state) { auto const n_rows = static_cast(state.get_int64("num_rows")); @@ -46,7 +41,7 @@ static void bench_find_string(nvbench::state& state) } auto const stream = cudf::get_default_stream(); - auto const col = build_input_column(n_rows, row_width, hit_rate); + auto const col = create_string_column(n_rows, row_width, hit_rate); auto const input = cudf::strings_column_view(col->view()); std::vector h_targets({"5W", "5W43", "0987 5W43"}); diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp index 99cef640dc3..105ae65cbe8 100644 --- a/cpp/benchmarks/string/like.cpp +++ b/cpp/benchmarks/string/like.cpp @@ -18,68 +18,12 @@ #include -#include -#include -#include #include #include #include #include -namespace { -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate) -{ - // build input table using the following data - auto raw_data = cudf::test::strings_column_wrapper( - { - "123 abc 4567890 DEFGHI 0987 5W43", // matches always; - "012345 6789 01234 56789 0123 456", // the rest do not match - "abc 4567890 DEFGHI 0987 Wxyz 123", - "abcdefghijklmnopqrstuvwxyz 01234", - "", - "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", - "9876543210,abcdefghijklmnopqrstU", - "9876543210,abcdefghijklmnopqrstU", - "123 édf 4567890 DéFG 0987 X5", - "1", - }) - .release(); - if (row_width / 32 > 1) { - std::vector columns; - for (int i = 0; i < row_width / 32; ++i) { - columns.push_back(raw_data->view()); - } - raw_data = cudf::strings::concatenate(cudf::table_view(columns)); - } - auto data_view = raw_data->view(); - - // compute number of rows in n_rows that should match - auto matches = static_cast(n_rows * hit_rate) / 100; - - // Create a randomized gather-map to build a column out of the strings in data. - data_profile gather_profile = - data_profile_builder().cardinality(0).null_probability(0.0).distribution( - cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); - auto gather_table = - create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); - gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); - - // Create scatter map by placing 0-index values throughout the gather-map - auto scatter_data = cudf::sequence( - matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); - auto zero_scalar = cudf::numeric_scalar(0); - auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); - auto gather_map = table->view().column(0); - table = cudf::gather(cudf::table_view({data_view}), gather_map); - - return std::move(table->release().front()); -} - -} // namespace - static void bench_like(nvbench::state& state) { auto const n_rows = static_cast(state.get_int64("num_rows")); @@ -91,7 +35,7 @@ static void bench_like(nvbench::state& state) state.skip("Skip benchmarks greater than size_type limit"); } - auto col = build_input_column(n_rows, row_width, hit_rate); + auto col = create_string_column(n_rows, row_width, hit_rate); auto input = cudf::strings_column_view(col->view()); // This pattern forces reading the entire target string (when matched expected) diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp index 8e48f8e9a05..43d57201b20 100644 --- a/cpp/benchmarks/text/ngrams.cpp +++ b/cpp/benchmarks/text/ngrams.cpp @@ -15,58 +15,45 @@ */ #include -#include -#include -#include #include #include #include -class TextNGrams : public cudf::benchmark {}; +#include -enum class ngrams_type { tokens, characters }; - -static void BM_ngrams(benchmark::State& state, ngrams_type nt) +static void bench_ngrams(nvbench::state& state) { - auto const n_rows = static_cast(state.range(0)); - auto const max_str_length = static_cast(state.range(1)); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const ngram_type = state.get_string("type"); + data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); auto const separator = cudf::string_scalar("_"); - for (auto _ : state) { - cuda_event_timer raii(state, true); - switch (nt) { - case ngrams_type::tokens: nvtext::generate_ngrams(input, 2, separator); break; - case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; - } - } + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); -} + auto chars_size = input.chars_size(cudf::get_default_stream()); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size * 2); -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 5; - int const max_rowlen = 40; - int const len_mult = 2; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + if (ngram_type == "chars") { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::generate_character_ngrams(input); + }); + } else { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::generate_ngrams(input, 2, separator); + }); + } } -#define NVTEXT_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(TextNGrams, name) \ - (::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \ - BENCHMARK_REGISTER_F(TextNGrams, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -NVTEXT_BENCHMARK_DEFINE(tokens) -NVTEXT_BENCHMARK_DEFINE(characters) +NVBENCH_BENCH(bench_ngrams) + .set_name("ngrams") + .add_int64_axis("num_rows", {131072, 262144, 524288, 1048578}) + .add_int64_axis("row_width", {10, 20, 40, 100}) + .add_string_axis("type", {"chars", "tokens"}); diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index fce8adb4c06..1c1052487f2 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -370,7 +370,7 @@ any type that cudf supports. For example, a `list_scalar` representing a list of |Value type|Scalar class|Notes| |-|-|-| |fixed-width|`fixed_width_scalar`| `T` can be any fixed-width type| -|numeric|`numeric_scalar` | `T` can be `int8_t`, `int16_t`, `int32_t`, `int_64_t`, `float` or `double`| +|numeric|`numeric_scalar` | `T` can be `int8_t`, `int16_t`, `int32_t`, `int64_t`, `float` or `double`| |fixed-point|`fixed_point_scalar` | `T` can be `numeric::decimal32` or `numeric::decimal64`| |timestamp|`timestamp_scalar` | `T` can be `timestamp_D`, `timestamp_s`, etc.| |duration|`duration_scalar` | `T` can be `duration_D`, `duration_s`, etc.| @@ -1483,6 +1483,17 @@ struct, and therefore `cudf::struct_view` is the data type of a `cudf::column` o `cudf::type_dispatcher` dispatches to the `struct_view` data type when invoked on a `STRUCT` column. +# Empty Columns + +The libcudf columns support empty, typed content. These columns have no data and no validity mask. +Empty strings or lists columns may or may not contain a child offsets column. +It is undefined behavior (UB) to access the offsets child of an empty strings or lists column. +Nested columns like lists and structs may require other children columns to provide the +nested structure of the empty types. + +Use `cudf::make_empty_column()` to create fixed-width and strings columns. +Use `cudf::empty_like()` to create an empty column from an existing `cudf::column_view`. + # cuIO: file reading and writing cuIO is a component of libcudf that provides GPU-accelerated reading and writing of data file diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp index a254171ef11..f4cce8e6da6 100644 --- a/cpp/include/cudf/ast/detail/expression_parser.hpp +++ b/cpp/include/cudf/ast/detail/expression_parser.hpp @@ -17,12 +17,8 @@ #include #include -#include #include #include -#include - -#include #include #include diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 6bbe32de134..e72661ce49a 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -24,8 +24,6 @@ #include -#include - namespace CUDF_EXPORT cudf { /** * @addtogroup column_factories diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 48f89b8be25..6db5c8b3c7b 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -16,7 +16,6 @@ #pragma once #include -#include #include #include #include diff --git a/cpp/include/cudf/detail/aggregation/device_aggregators.cuh b/cpp/include/cudf/detail/aggregation/device_aggregators.cuh index 10be5e1d36f..204eee49a2a 100644 --- a/cpp/include/cudf/detail/aggregation/device_aggregators.cuh +++ b/cpp/include/cudf/detail/aggregation/device_aggregators.cuh @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once #include @@ -29,12 +28,31 @@ #include namespace cudf::detail { +/// Checks if an aggregation kind needs to operate on the underlying storage type +template +__device__ constexpr bool uses_underlying_type() +{ + return k == aggregation::MIN or k == aggregation::MAX or k == aggregation::SUM; +} + +/// Gets the underlying target type for the given source type and aggregation kind +template +using underlying_target_t = + cuda::std::conditional_t(), + cudf::device_storage_type_t>, + cudf::detail::target_type_t>; + +/// Gets the underlying source type for the given source type and aggregation kind +template +using underlying_source_t = + cuda::std::conditional_t(), cudf::device_storage_type_t, Source>; + template struct update_target_element { - __device__ void operator()(mutable_column_device_view target, - size_type target_index, - column_device_view source, - size_type source_index) const noexcept + __device__ void operator()(mutable_column_device_view, + size_type, + column_device_view, + size_type) const noexcept { CUDF_UNREACHABLE("Invalid source type and aggregation combination."); } @@ -51,8 +69,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_min(&target.element(target_index), static_cast(source.element(source_index))); @@ -72,8 +88,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; using DeviceTarget = device_storage_type_t; using DeviceSource = device_storage_type_t; @@ -96,8 +110,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_max(&target.element(target_index), static_cast(source.element(source_index))); @@ -117,8 +129,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; using DeviceTarget = device_storage_type_t; using DeviceSource = device_storage_type_t; @@ -141,8 +151,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_add(&target.element(target_index), static_cast(source.element(source_index))); @@ -162,8 +170,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; using DeviceTarget = device_storage_type_t; using DeviceSource = device_storage_type_t; @@ -197,10 +203,10 @@ struct update_target_from_dictionary { template ()>* = nullptr> - __device__ void operator()(mutable_column_device_view target, - size_type target_index, - column_device_view source, - size_type source_index) const noexcept + __device__ void operator()(mutable_column_device_view, + size_type, + column_device_view, + size_type) const noexcept { } }; @@ -227,8 +233,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - dispatch_type_and_aggregation( source.child(cudf::dictionary_column_view::keys_column_index).type(), k, @@ -249,8 +253,6 @@ struct update_target_element; auto value = static_cast(source.element(source_index)); cudf::detail::atomic_add(&target.element(target_index), value * value); @@ -267,8 +269,6 @@ struct update_target_element; cudf::detail::atomic_mul(&target.element(target_index), static_cast(source.element(source_index))); @@ -286,8 +286,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; cudf::detail::atomic_add(&target.element(target_index), Target{1}); @@ -323,8 +321,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; auto old = cudf::detail::atomic_cas( &target.element(target_index), ARGMAX_SENTINEL, source_index); @@ -349,8 +345,6 @@ struct update_target_element< column_device_view source, size_type source_index) const noexcept { - if (source.is_null(source_index)) { return; } - using Target = target_type_t; auto old = cudf::detail::atomic_cas( &target.element(target_index), ARGMIN_SENTINEL, source_index); @@ -376,6 +370,9 @@ struct elementwise_aggregator { column_device_view source, size_type source_index) const noexcept { + if constexpr (k != cudf::aggregation::COUNT_ALL) { + if (source.is_null(source_index)) { return; } + } update_target_element{}(target, target_index, source, source_index); } }; diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp index ec5a511bb7c..486808ebe18 100644 --- a/cpp/include/cudf/detail/aggregation/result_cache.hpp +++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp @@ -19,7 +19,6 @@ #include #include #include -#include #include diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index dfb646c66c4..4159e324472 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include #include -#include #include #include @@ -256,7 +256,7 @@ struct scatter_gather_functor { cudf::detail::grid_1d grid{input.size(), block_size, per_thread}; - rmm::device_scalar null_count{0, stream}; + cudf::detail::device_scalar null_count{0, stream}; if (output.nullable()) { // Have to initialize the output mask to all zeros because we may update // it with atomicOr(). diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index a70cd5a0661..5dc75b1a3fb 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -19,12 +19,11 @@ #include #include #include +#include #include #include #include -#include - #include #include @@ -171,7 +170,7 @@ std::unique_ptr copy_if_else(bool nullable, // if we have validity in the output if (nullable) { - rmm::device_scalar valid_count{0, stream}; + cudf::detail::device_scalar valid_count{0, stream}; // call the kernel copy_if_else_kernel diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index 3aa136d630b..fcb80fe45f7 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include #include -#include #include #include @@ -154,7 +154,7 @@ void copy_range(SourceValueIterator source_value_begin, auto grid = cudf::detail::grid_1d{num_items, block_size, 1}; if (target.nullable()) { - rmm::device_scalar null_count(target.null_count(), stream); + cudf::detail::device_scalar null_count(target.null_count(), stream); auto kernel = copy_range_kernel; diff --git a/cpp/include/cudf/detail/device_scalar.hpp b/cpp/include/cudf/detail/device_scalar.hpp new file mode 100644 index 00000000000..16ca06c6561 --- /dev/null +++ b/cpp/include/cudf/detail/device_scalar.hpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace CUDF_EXPORT cudf { +namespace detail { + +template +class device_scalar : public rmm::device_scalar { + public: +#ifdef __CUDACC__ +#pragma nv_exec_check_disable +#endif + ~device_scalar() = default; + +// Implementation is the same as what compiler should generate +// Could not use default move constructor as 11.8 compiler fails to generate it +#ifdef __CUDACC__ +#pragma nv_exec_check_disable +#endif + device_scalar(device_scalar&& other) noexcept + : rmm::device_scalar{std::move(other)}, bounce_buffer{std::move(other.bounce_buffer)} + { + } + device_scalar& operator=(device_scalar&&) noexcept = default; + + device_scalar(device_scalar const&) = delete; + device_scalar& operator=(device_scalar const&) = delete; + + device_scalar() = delete; + + explicit device_scalar( + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) + : rmm::device_scalar(stream, mr), bounce_buffer{make_host_vector(1, stream)} + { + } + + explicit device_scalar( + T const& initial_value, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) + : rmm::device_scalar(stream, mr), bounce_buffer{make_host_vector(1, stream)} + { + bounce_buffer[0] = initial_value; + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + device_scalar(device_scalar const& other, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) + : rmm::device_scalar(other, stream, mr), bounce_buffer{make_host_vector(1, stream)} + { + } + + [[nodiscard]] T value(rmm::cuda_stream_view stream) const + { + cuda_memcpy(bounce_buffer, device_span{this->data(), 1}, stream); + return bounce_buffer[0]; + } + + void set_value_async(T const& value, rmm::cuda_stream_view stream) + { + bounce_buffer[0] = value; + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + void set_value_async(T&& value, rmm::cuda_stream_view stream) + { + bounce_buffer[0] = std::move(value); + cuda_memcpy_async(device_span{this->data(), 1}, bounce_buffer, stream); + } + + void set_value_to_zero_async(rmm::cuda_stream_view stream) { set_value_async(T{}, stream); } + + private: + mutable cudf::detail::host_vector bounce_buffer; +}; + +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp index 4b74d12f306..26b1bec2ced 100644 --- a/cpp/include/cudf/detail/is_element_valid.hpp +++ b/cpp/include/cudf/detail/is_element_valid.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include #include #include diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 4349e1b70fd..30f36d6a5da 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -38,18 +38,19 @@ #include #include +#include +#include #include #include #include #include -#include - namespace cudf { namespace detail { /** * @brief Convenience wrapper for creating a `thrust::transform_iterator` over a - * `thrust::counting_iterator`. + * `thrust::counting_iterator` within the range [0, INT_MAX]. + * * * Example: * @code{.cpp} @@ -62,14 +63,21 @@ namespace detail { * iter[n] == n * n * @endcode * - * @param start The starting value of the counting iterator + * @param start The starting value of the counting iterator (must be size_type or smaller type). * @param f The unary function to apply to the counting iterator. * @return A transform iterator that applies `f` to a counting iterator */ -template -CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(cudf::size_type start, +template +CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(CountingIterType start, UnaryFunction f) { + // Check if the `start` for counting_iterator is of size_type or a smaller integral type + static_assert( + cuda::std::is_integral_v and + cuda::std::numeric_limits::digits <= + cuda::std::numeric_limits::digits, + "The `start` for the counting_transform_iterator must be size_type or smaller type"); + return thrust::make_transform_iterator(thrust::make_counting_iterator(start), f); } diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 327c732716c..025e2ccc3ec 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -25,7 +26,6 @@ #include #include -#include #include #include @@ -165,17 +165,10 @@ size_type inplace_bitmask_binop(Binop op, "Mask pointer cannot be null"); rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref(); - rmm::device_scalar d_counter{0, stream, mr}; - rmm::device_uvector d_masks(masks.size(), stream, mr); - rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); - - CUDF_CUDA_TRY(cudaMemcpyAsync( - d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyDefault, stream.value())); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(), - masks_begin_bits.data(), - masks_begin_bits.size_bytes(), - cudaMemcpyDefault, - stream.value())); + cudf::detail::device_scalar d_counter{0, stream, mr}; + + auto d_masks = cudf::detail::make_device_uvector_async(masks, stream, mr); + auto d_begin_bits = cudf::detail::make_device_uvector_async(masks_begin_bits, stream, mr); auto constexpr block_size = 256; cudf::detail::grid_1d config(dest_mask.size(), block_size); diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 18b1e9b2d2e..0f852db0c54 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -59,7 +59,7 @@ std::unique_ptr true_if(InputIterator begin, auto output_mutable_view = output->mutable_view(); auto output_data = output_mutable_view.data(); - thrust::transform(rmm::exec_policy(stream), begin, end, output_data, p); + thrust::transform(rmm::exec_policy_nosync(stream), begin, end, output_data, p); return output; } diff --git a/cpp/include/cudf/detail/utilities/batched_memset.hpp b/cpp/include/cudf/detail/utilities/batched_memset.hpp index 75f738f7529..78be5b91248 100644 --- a/cpp/include/cudf/detail/utilities/batched_memset.hpp +++ b/cpp/include/cudf/detail/utilities/batched_memset.hpp @@ -53,8 +53,8 @@ void batched_memset(std::vector> const& bufs, cudf::detail::make_device_uvector_async(bufs, stream, cudf::get_current_device_resource_ref()); // get a vector with the sizes of all buffers - auto sizes = cudf::detail::make_counting_transform_iterator( - static_cast(0), + auto sizes = thrust::make_transform_iterator( + thrust::counting_iterator(0), cuda::proclaim_return_type( [gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].size(); })); diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index cfb2e70bfed..af182b69c3a 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -25,7 +26,6 @@ #include #include -#include #include @@ -101,7 +101,7 @@ std::pair valid_if(InputIterator begin, size_type null_count{0}; if (size > 0) { - rmm::device_scalar valid_count{0, stream}; + cudf::detail::device_scalar valid_count{0, stream}; constexpr size_type block_size{256}; grid_1d grid{size, block_size}; diff --git a/cpp/include/cudf/dictionary/dictionary_column_view.hpp b/cpp/include/cudf/dictionary/dictionary_column_view.hpp index 5596f78a90b..0a799f27d00 100644 --- a/cpp/include/cudf/dictionary/dictionary_column_view.hpp +++ b/cpp/include/cudf/dictionary/dictionary_column_view.hpp @@ -15,7 +15,6 @@ */ #pragma once -#include #include /** diff --git a/cpp/include/cudf/fixed_point/floating_conversion.hpp b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp similarity index 99% rename from cpp/include/cudf/fixed_point/floating_conversion.hpp rename to cpp/include/cudf/fixed_point/detail/floating_conversion.hpp index f0d50edccd1..fce08b4a5c4 100644 --- a/cpp/include/cudf/fixed_point/floating_conversion.hpp +++ b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp @@ -26,14 +26,6 @@ #include namespace CUDF_EXPORT numeric { - -/** - * @addtogroup floating_conversion - * @{ - * @file - * @brief fixed_point <--> floating-point conversion functions. - */ - namespace detail { /** @@ -1141,6 +1133,4 @@ CUDF_HOST_DEVICE inline FloatingType convert_integral_to_floating(Rep const& val } } // namespace detail - -/** @} */ // end of group } // namespace CUDF_EXPORT numeric diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index 0c5327edb91..307a52cd242 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -22,26 +22,27 @@ namespace CUDF_EXPORT cudf { -/** - * @addtogroup column_hash - * @{ - * @file - */ - /** * @brief Type of hash value - * + * @ingroup column_hash */ using hash_value_type = uint32_t; /** * @brief The default seed value for hash functions + * @ingroup column_hash */ static constexpr uint32_t DEFAULT_HASH_SEED = 0; //! Hash APIs namespace hashing { +/** + * @addtogroup column_hash + * @{ + * @file + */ + /** * @brief Computes the MurmurHash3 32-bit hash value of each row in the given table * @@ -183,7 +184,8 @@ std::unique_ptr xxhash_64( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** @} */ // end of group + } // namespace hashing -/** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/config_utils.hpp b/cpp/include/cudf/io/config_utils.hpp index 1827ba0e3e6..13a76d50346 100644 --- a/cpp/include/cudf/io/config_utils.hpp +++ b/cpp/include/cudf/io/config_utils.hpp @@ -18,7 +18,8 @@ #include namespace CUDF_EXPORT cudf { -namespace io::cufile_integration { +namespace io { +namespace cufile_integration { /** * @brief Returns true if cuFile and its compatibility mode are enabled. @@ -35,9 +36,15 @@ bool is_gds_enabled(); */ bool is_kvikio_enabled(); -} // namespace io::cufile_integration +/** + * @brief Set kvikIO thread pool size according to the environment variable KVIKIO_NTHREADS. If + * KVIKIO_NTHREADS is not set, use 8 threads by default. + */ +void set_thread_pool_nthreads_from_env(); + +} // namespace cufile_integration -namespace io::nvcomp_integration { +namespace nvcomp_integration { /** * @brief Returns true if all nvCOMP uses are enabled. @@ -49,5 +56,6 @@ bool is_all_enabled(); */ bool is_stable_enabled(); -} // namespace io::nvcomp_integration +} // namespace nvcomp_integration +} // namespace io } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp index 7d2cc4ad493..7bec40893fd 100644 --- a/cpp/include/cudf/io/datasource.hpp +++ b/cpp/include/cudf/io/datasource.hpp @@ -79,7 +79,7 @@ class datasource { template static std::unique_ptr create(Container&& data_owner) { - return std::make_unique>(std::move(data_owner)); + return std::make_unique>(std::forward(data_owner)); } }; @@ -335,13 +335,19 @@ class datasource { template class owning_buffer : public buffer { public: + // Require that the argument passed to the constructor be an rvalue (Container&& being an rvalue + // reference). + static_assert(std::is_rvalue_reference_v, + "The container argument passed to the constructor must be an rvalue."); + /** * @brief Moves the input container into the newly created object. * - * @param data_owner The container to construct the buffer from (ownership is transferred) + * @param moved_data_owner The container to construct the buffer from. Callers should explicitly + * pass std::move(data_owner) to this function to transfer the ownership. */ - owning_buffer(Container&& data_owner) - : _data(std::move(data_owner)), _data_ptr(_data.data()), _size(_data.size()) + owning_buffer(Container&& moved_data_owner) + : _data(std::move(moved_data_owner)), _data_ptr(_data.data()), _size(_data.size()) { } @@ -349,12 +355,13 @@ class datasource { * @brief Moves the input container into the newly created object, and exposes a subspan of the * buffer. * - * @param data_owner The container to construct the buffer from (ownership is transferred) + * @param moved_data_owner The container to construct the buffer from. Callers should explicitly + * pass std::move(data_owner) to this function to transfer the ownership. * @param data_ptr Pointer to the start of the subspan * @param size The size of the subspan */ - owning_buffer(Container&& data_owner, uint8_t const* data_ptr, size_t size) - : _data(std::move(data_owner)), _data_ptr(data_ptr), _size(size) + owning_buffer(Container&& moved_data_owner, uint8_t const* data_ptr, size_t size) + : _data(std::move(moved_data_owner)), _data_ptr(data_ptr), _size(size) { } diff --git a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp index 11eb4518210..5659f86b0c4 100644 --- a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp +++ b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp @@ -16,16 +16,10 @@ #pragma once -#include #include #include -#include - -#include -#include #include -#include namespace CUDF_EXPORT cudf { namespace io::text::detail::bgzip { diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 385da993262..f9a68e4fffc 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -70,6 +70,7 @@ enum class hash_id { * @param partition_map Non-nullable column of integer values that map each row * in `t` to it's partition. * @param num_partitions The total number of partitions + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return Pair containing the reordered table and vector of `num_partitions + * 1` offsets to each partition such that the size of partition `i` is @@ -79,6 +80,7 @@ std::pair, std::vector> partition( table_view const& t, column_view const& partition_map, size_type num_partitions, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @@ -242,6 +244,7 @@ std::pair, std::vector> hash_partition( * @param[in] input The input table to be round-robin partitioned * @param[in] num_partitions Number of partitions for the table * @param[in] start_partition Index of the 1st partition + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * * @return A std::pair consisting of a unique_ptr to the partitioned table @@ -251,6 +254,7 @@ std::pair, std::vector> round_robi table_view const& input, cudf::size_type num_partitions, cudf::size_type start_partition = 0, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index ba56ff34b97..158e6df7e5f 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -66,6 +67,7 @@ enum class rounding_method : int32_t { HALF_UP, HALF_EVEN }; * @param decimal_places Number of decimal places to round to (default 0). If negative, this * specifies the number of positions to the left of the decimal point. * @param method Rounding method + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * * @return Column with each of the values rounded @@ -74,6 +76,7 @@ std::unique_ptr round( column_view const& input, int32_t decimal_places = 0, rounding_method method = rounding_method::HALF_UP, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 66be2a12fbe..360dde11fc0 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -94,8 +95,8 @@ class scalar { [[nodiscard]] bool const* validity_data() const; protected: - data_type _type{type_id::EMPTY}; ///< Logical type of value in the scalar - rmm::device_scalar _is_valid; ///< Device bool signifying validity + data_type _type{type_id::EMPTY}; ///< Logical type of value in the scalar + cudf::detail::device_scalar _is_valid; ///< Device bool signifying validity /** * @brief Move constructor for scalar. diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index fb0b25cf9f1..de2f1770e28 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -65,19 +65,20 @@ rmm::device_uvector make_chars_buffer(column_view const& offsets, auto chars_data = rmm::device_uvector(chars_size, stream, mr); auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets); - auto const src_ptrs = cudf::detail::make_counting_transform_iterator( - 0u, cuda::proclaim_return_type([begin] __device__(uint32_t idx) { + auto const src_ptrs = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type([begin] __device__(uint32_t idx) { // Due to a bug in cub (https://github.com/NVIDIA/cccl/issues/586), // we have to use `const_cast` to remove `const` qualifier from the source pointer. // This should be fine as long as we only read but not write anything to the source. return reinterpret_cast(const_cast(begin[idx].first)); })); - auto const src_sizes = cudf::detail::make_counting_transform_iterator( - 0u, cuda::proclaim_return_type([begin] __device__(uint32_t idx) { - return begin[idx].second; - })); - auto const dst_ptrs = cudf::detail::make_counting_transform_iterator( - 0u, + auto const src_sizes = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [begin] __device__(uint32_t idx) { return begin[idx].second; })); + auto const dst_ptrs = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), cuda::proclaim_return_type([offsets = d_offsets, output = chars_data.data()] __device__( uint32_t idx) { return output + offsets[idx]; })); diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 53e0f3a15d2..046e9745a71 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -16,8 +16,8 @@ #pragma once +#include #include -#include #include #include #include diff --git a/cpp/include/cudf/utilities/default_stream.hpp b/cpp/include/cudf/utilities/default_stream.hpp index 97a42243250..3e740b81cc9 100644 --- a/cpp/include/cudf/utilities/default_stream.hpp +++ b/cpp/include/cudf/utilities/default_stream.hpp @@ -16,10 +16,8 @@ #pragma once -#include #include -#include #include namespace CUDF_EXPORT cudf { diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index d558cfb5e85..21ee4fa9e9b 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -425,21 +425,21 @@ class base_2dspan { * * @return A pointer to the first element of the span */ - constexpr auto data() const noexcept { return _flat.data(); } + [[nodiscard]] constexpr auto data() const noexcept { return _flat.data(); } /** * @brief Returns the size in the span as pair. * * @return pair representing rows and columns size of the span */ - constexpr auto size() const noexcept { return _size; } + [[nodiscard]] constexpr auto size() const noexcept { return _size; } /** * @brief Returns the number of elements in the span. * * @return Number of elements in the span */ - constexpr auto count() const noexcept { return _flat.size(); } + [[nodiscard]] constexpr auto count() const noexcept { return _flat.size(); } /** * @brief Checks if the span is empty. @@ -467,7 +467,7 @@ class base_2dspan { * * @return A flattened span of the 2D span */ - constexpr RowType flat_view() const { return _flat; } + [[nodiscard]] constexpr RowType flat_view() const { return _flat; } /** * @brief Construct a 2D span from another 2D span of convertible type diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 3f37ae02151..cf8413b597f 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -22,8 +22,6 @@ #include #include -#include - namespace CUDF_EXPORT cudf { /** diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 15b5f921c1b..6351a84e38f 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -16,7 +16,6 @@ #pragma once -#include #include #include #include diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp index 7c909f1a948..42124461cdf 100644 --- a/cpp/include/nvtext/minhash.hpp +++ b/cpp/include/nvtext/minhash.hpp @@ -41,6 +41,8 @@ namespace CUDF_EXPORT nvtext { * * This function uses MurmurHash3_x86_32 for the hash algorithm. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if the width < 2 * * @param input Strings column to compute minhash @@ -51,7 +53,7 @@ namespace CUDF_EXPORT nvtext { * @param mr Device memory resource used to allocate the returned column's device memory * @return Minhash values for each string in input */ -std::unique_ptr minhash( +[[deprecated]] std::unique_ptr minhash( cudf::strings_column_view const& input, cudf::numeric_scalar seed = 0, cudf::size_type width = 4, @@ -71,6 +73,8 @@ std::unique_ptr minhash( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 - to be replaced in a future release + * * @throw std::invalid_argument if the width < 2 * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit @@ -83,7 +87,7 @@ std::unique_ptr minhash( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr minhash( +[[deprecated]] std::unique_ptr minhash( cudf::strings_column_view const& input, cudf::device_span seeds, cudf::size_type width = 4, @@ -102,6 +106,8 @@ std::unique_ptr minhash( * The hash function returns 2 uint64 values but only the first value * is used with the minhash calculation. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if the width < 2 * * @param input Strings column to compute minhash @@ -112,7 +118,7 @@ std::unique_ptr minhash( * @param mr Device memory resource used to allocate the returned column's device memory * @return Minhash values as UINT64 for each string in input */ -std::unique_ptr minhash64( +[[deprecated]] std::unique_ptr minhash64( cudf::strings_column_view const& input, cudf::numeric_scalar seed = 0, cudf::size_type width = 4, @@ -132,6 +138,8 @@ std::unique_ptr minhash64( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 - to be replaced in a future release + * * @throw std::invalid_argument if the width < 2 * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit @@ -144,7 +152,7 @@ std::unique_ptr minhash64( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr minhash64( +[[deprecated]] std::unique_ptr minhash64( cudf::strings_column_view const& input, cudf::device_span seeds, cudf::size_type width = 4, @@ -164,6 +172,8 @@ std::unique_ptr minhash64( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit * @@ -173,7 +183,7 @@ std::unique_ptr minhash64( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr word_minhash( +[[deprecated]] std::unique_ptr word_minhash( cudf::lists_column_view const& input, cudf::device_span seeds, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -193,6 +203,8 @@ std::unique_ptr word_minhash( * * Any null row entries result in corresponding null output rows. * + * @deprecated Deprecated in 24.12 + * * @throw std::invalid_argument if seeds is empty * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit * @@ -202,7 +214,7 @@ std::unique_ptr word_minhash( * @param mr Device memory resource used to allocate the returned column's device memory * @return List column of minhash values for each string per seed */ -std::unique_ptr word_minhash64( +[[deprecated]] std::unique_ptr word_minhash64( cudf::lists_column_view const& input, cudf::device_span seeds, rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/include/nvtext/replace.hpp b/cpp/include/nvtext/replace.hpp index bbd0503379b..822edcbdb43 100644 --- a/cpp/include/nvtext/replace.hpp +++ b/cpp/include/nvtext/replace.hpp @@ -82,7 +82,7 @@ namespace CUDF_EXPORT nvtext { * The default of empty string will identify tokens using whitespace. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New strings columns of with replaced strings + * @return New strings column with replaced strings */ std::unique_ptr replace_tokens( cudf::strings_column_view const& input, @@ -131,7 +131,7 @@ std::unique_ptr replace_tokens( * The default of empty string will identify tokens using whitespace. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New strings columns of with replaced strings + * @return New strings column of filtered strings */ std::unique_ptr filter_tokens( cudf::strings_column_view const& input, diff --git a/cpp/include/nvtext/stemmer.hpp b/cpp/include/nvtext/stemmer.hpp index 55a4124bfd0..e5b2a4cc21b 100644 --- a/cpp/include/nvtext/stemmer.hpp +++ b/cpp/include/nvtext/stemmer.hpp @@ -51,7 +51,7 @@ enum class letter_type { * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * b1 = is_letter(st, VOWEL, 1) * b1 is now [false, true, true] * @endcode @@ -62,7 +62,7 @@ enum class letter_type { * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * b2 = is_letter(st, CONSONANT, -1) // last letter checked in each string * b2 is now [false, true, false] * @endcode @@ -99,7 +99,7 @@ std::unique_ptr is_letter( * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * ix = [3, 1, 4] * b1 = is_letter(st, VOWEL, ix) * b1 is now [true, true, false] @@ -111,7 +111,7 @@ std::unique_ptr is_letter( * * @code{.pseudo} * Example: - * st = ["trouble", "toy", "sygyzy"] + * st = ["trouble", "toy", "syzygy"] * ix = [3, -2, 4] // 2nd to last character in st[1] is checked * b2 = is_letter(st, CONSONANT, ix) * b2 is now [false, false, true] diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp index e61601c6fea..e345587f88b 100644 --- a/cpp/include/nvtext/tokenize.hpp +++ b/cpp/include/nvtext/tokenize.hpp @@ -292,7 +292,7 @@ std::unique_ptr load_vocabulary( * @throw cudf::logic_error if `delimiter` is invalid * * @param input Strings column to tokenize - * @param vocabulary Used to lookup tokens within + * @param vocabulary Used to lookup tokens within `input` * @param delimiter Used to identify tokens within `input` * @param default_id The token id to be used for tokens not found in the `vocabulary`; * Default is -1 diff --git a/cpp/src/ast/expression_parser.cpp b/cpp/src/ast/expression_parser.cpp index 3b650d791aa..5815ce33e33 100644 --- a/cpp/src/ast/expression_parser.cpp +++ b/cpp/src/ast/expression_parser.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/cpp/src/ast/expressions.cpp b/cpp/src/ast/expressions.cpp index b45b9d0c78c..4c2b56dd4f5 100644 --- a/cpp/src/ast/expressions.cpp +++ b/cpp/src/ast/expressions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,6 @@ #include #include #include -#include -#include -#include #include #include diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index a6c878efbbc..1b23ea12a5e 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -27,15 +27,10 @@ #include #include #include -#include #include -#include #include #include -#include -#include #include -#include #include #include diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 4ca05f9c335..e6659f76c7c 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include -#include #include #include @@ -329,7 +329,7 @@ cudf::size_type count_set_bits(bitmask_type const* bitmask, cudf::detail::grid_1d grid(num_words, block_size); - rmm::device_scalar non_zero_count(0, stream); + cudf::detail::device_scalar non_zero_count(0, stream); count_set_bits_kernel <<>>( diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 482413d0ccb..972f97e8668 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -15,19 +15,13 @@ */ #include -#include #include #include #include -#include #include -#include #include -#include #include -#include - namespace cudf { namespace { struct size_of_helper { diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 386c5ebe478..e831aa9645d 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -27,9 +26,7 @@ #include #include -#include #include -#include #include namespace cudf { diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index b8e140f1fa5..d8419760120 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -162,7 +163,7 @@ size_type concatenate_masks(device_span d_views, size_type output_size, rmm::cuda_stream_view stream) { - rmm::device_scalar d_valid_count(0, stream); + cudf::detail::device_scalar d_valid_count(0, stream); constexpr size_type block_size{256}; cudf::detail::grid_1d config(output_size, block_size); concatenate_masks_kernel @@ -265,7 +266,7 @@ std::unique_ptr fused_concatenate(host_span views, auto out_view = out_col->mutable_view(); auto d_out_view = mutable_column_device_view::create(out_view, stream); - rmm::device_scalar d_valid_count(0, stream); + cudf::detail::device_scalar d_valid_count(0, stream); // Launch kernel constexpr size_type block_size{256}; diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index d60fb5ce110..5e2065ba844 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -20,16 +20,11 @@ #include #include #include -#include #include -#include -#include #include #include -#include - #include namespace cudf { diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 29a28f81d1a..80b0bd5242f 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,7 @@ struct get_element_functor { auto device_col = column_device_view::create(input, stream); rmm::device_scalar temp_data(stream, mr); - rmm::device_scalar temp_valid(stream, mr); + cudf::detail::device_scalar temp_valid(stream, mr); device_single_thread( [buffer = temp_data.data(), @@ -155,8 +156,8 @@ struct get_element_functor { auto device_col = column_device_view::create(input, stream); - rmm::device_scalar temp_data(stream, mr); - rmm::device_scalar temp_valid(stream, mr); + cudf::detail::device_scalar temp_data(stream, mr); + cudf::detail::device_scalar temp_valid(stream, mr); device_single_thread( [buffer = temp_data.data(), diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index 1282eec6c44..a001807c82b 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp index 832a72ed5b0..116e3516460 100644 --- a/cpp/src/copying/split.cpp +++ b/cpp/src/copying/split.cpp @@ -14,10 +14,8 @@ * limitations under the License. */ -#include #include #include -#include #include #include diff --git a/cpp/src/datetime/timezone.cpp b/cpp/src/datetime/timezone.cpp index 2196ee97fee..f786624680c 100644 --- a/cpp/src/datetime/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -13,12 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/hash/compute_groupby.cu b/cpp/src/groupby/hash/compute_groupby.cu new file mode 100644 index 00000000000..59457bea694 --- /dev/null +++ b/cpp/src/groupby/hash/compute_groupby.cu @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_groupby.hpp" +#include "compute_single_pass_aggs.hpp" +#include "helpers.cuh" +#include "sparse_to_dense_results.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +rmm::device_uvector extract_populated_keys(SetType const& key_set, + size_type num_keys, + rmm::cuda_stream_view stream) +{ + rmm::device_uvector populated_keys(num_keys, stream); + auto const keys_end = key_set.retrieve_all(populated_keys.begin(), stream.value()); + + populated_keys.resize(std::distance(populated_keys.begin(), keys_end), stream); + return populated_keys; +} + +template +std::unique_ptr compute_groupby(table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + Equal const& d_row_equal, + Hash const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + // convert to int64_t to avoid potential overflow with large `keys` + auto const num_keys = static_cast(keys.num_rows()); + + // Cache of sparse results where the location of aggregate value in each + // column is indexed by the hash set + cudf::detail::result_cache sparse_results(requests.size()); + + auto const set = cuco::static_set{ + num_keys, + cudf::detail::CUCO_DESIRED_LOAD_FACTOR, // 50% load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_row_equal, + probing_scheme_t{d_row_hash}, + cuco::thread_scope_device, + cuco::storage{}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; + + auto row_bitmask = + skip_rows_with_nulls + ? cudf::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first + : rmm::device_buffer{}; + + // Compute all single pass aggs first + compute_single_pass_aggs(num_keys, + skip_rows_with_nulls, + static_cast(row_bitmask.data()), + set.ref(cuco::insert_and_find), + requests, + &sparse_results, + stream); + + // Extract the populated indices from the hash set and create a gather map. + // Gathering using this map from sparse results will give dense results. + auto gather_map = extract_populated_keys(set, keys.num_rows(), stream); + + // Compact all results from sparse_results and insert into cache + sparse_to_dense_results(requests, + &sparse_results, + cache, + gather_map, + set.ref(cuco::find), + static_cast(row_bitmask.data()), + stream, + mr); + + return cudf::detail::gather(keys, + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); +} + +template rmm::device_uvector extract_populated_keys( + global_set_t const& key_set, size_type num_keys, rmm::cuda_stream_view stream); + +template rmm::device_uvector extract_populated_keys( + nullable_global_set_t const& key_set, size_type num_keys, rmm::cuda_stream_view stream); + +template std::unique_ptr
compute_groupby( + table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + row_comparator_t const& d_row_equal, + row_hash_t const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template std::unique_ptr
compute_groupby( + table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + nullable_row_comparator_t const& d_row_equal, + row_hash_t const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_groupby.hpp b/cpp/src/groupby/hash/compute_groupby.hpp new file mode 100644 index 00000000000..7bb3a60ff07 --- /dev/null +++ b/cpp/src/groupby/hash/compute_groupby.hpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Computes and returns a device vector containing all populated keys in + * `key_set`. + * + * @tparam SetType Type of key hash set + * + * @param key_set Key hash set + * @param num_keys Number of input keys + * @param stream CUDA stream used for device memory operations and kernel launches + * @return An array of unique keys contained in `key_set` + */ +template +rmm::device_uvector extract_populated_keys(SetType const& key_set, + size_type num_keys, + rmm::cuda_stream_view stream); + +/** + * @brief Computes groupby using hash table. + * + * First, we create a hash table that stores the indices of unique rows in + * `keys`. The upper limit on the number of values in this map is the number + * of rows in `keys`. + * + * To store the results of aggregations, we create temporary sparse columns + * which have the same size as input value columns. Using the hash map, we + * determine the location within the sparse column to write the result of the + * aggregation into. + * + * The sparse column results of all aggregations are stored into the cache + * `sparse_results`. This enables the use of previously calculated results in + * other aggregations. + * + * All the aggregations which can be computed in a single pass are computed + * first, in a combined kernel. Then using these results, aggregations that + * require multiple passes, will be computed. + * + * Finally, using the hash map, we generate a vector of indices of populated + * values in sparse result columns. Then, for each aggregation originally + * requested in `requests`, we gather sparse results into a column of dense + * results using the aforementioned index vector. Dense results are stored into + * the in/out parameter `cache`. + * + * @tparam Equal Device row comparator type + * @tparam Hash Device row hasher type + * + * @param keys Table whose rows act as the groupby keys + * @param requests The set of columns to aggregate and the aggregations to perform + * @param skip_rows_with_nulls Flag indicating whether to ignore nulls or not + * @param d_row_equal Device row comparator + * @param d_row_hash Device row hasher + * @param cache Dense aggregation results + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table + * @return Table of unique keys + */ +template +std::unique_ptr compute_groupby(table_view const& keys, + host_span requests, + bool skip_rows_with_nulls, + Equal const& d_row_equal, + Hash const& d_row_hash, + cudf::detail::result_cache* cache, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_mapping_indices.cu b/cpp/src/groupby/hash/compute_mapping_indices.cu new file mode 100644 index 00000000000..519d7cd2f1c --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_mapping_indices.cuh" +#include "compute_mapping_indices.hpp" + +namespace cudf::groupby::detail::hash { +template cudf::size_type max_occupancy_grid_size>( + cudf::size_type n); + +template void compute_mapping_indices>( + cudf::size_type grid_size, + cudf::size_type num, + hash_set_ref_t global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_mapping_indices.cuh b/cpp/src/groupby/hash/compute_mapping_indices.cuh new file mode 100644 index 00000000000..d353830780f --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices.cuh @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "compute_mapping_indices.hpp" +#include "helpers.cuh" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +template +__device__ void find_local_mapping(cooperative_groups::thread_block const& block, + cudf::size_type idx, + cudf::size_type num_input_rows, + SetType shared_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* cardinality, + cudf::size_type* local_mapping_index, + cudf::size_type* shared_set_indices) +{ + auto const is_valid_input = + idx < num_input_rows and (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, idx)); + auto const [result_idx, inserted] = [&]() { + if (is_valid_input) { + auto const result = shared_set.insert_and_find(idx); + auto const matched_idx = *result.first; + auto const inserted = result.second; + // inserted a new element + if (result.second) { + auto const shared_set_index = atomicAdd(cardinality, 1); + shared_set_indices[shared_set_index] = idx; + local_mapping_index[idx] = shared_set_index; + } + return cuda::std::pair{matched_idx, inserted}; + } + return cuda::std::pair{0, false}; // dummy values + }(); + // Syncing the thread block is needed so that updates in `local_mapping_index` are visible to all + // threads in the thread block. + block.sync(); + if (is_valid_input) { + // element was already in set + if (!inserted) { local_mapping_index[idx] = local_mapping_index[result_idx]; } + } +} + +template +__device__ void find_global_mapping(cooperative_groups::thread_block const& block, + cudf::size_type cardinality, + SetRef global_set, + cudf::size_type* shared_set_indices, + cudf::size_type* global_mapping_index) +{ + // for all unique keys in shared memory hash set, stores their matches in + // global hash set to `global_mapping_index` + for (auto idx = block.thread_rank(); idx < cardinality; idx += block.num_threads()) { + auto const input_idx = shared_set_indices[idx]; + global_mapping_index[block.group_index().x * GROUPBY_SHM_MAX_ELEMENTS + idx] = + *global_set.insert_and_find(input_idx).first; + } +} + +/* + * @brief Inserts keys into the shared memory hash set, and stores the block-wise rank for a given + * row index in `local_mapping_index`. If the number of unique keys found in a threadblock exceeds + * `GROUPBY_CARDINALITY_THRESHOLD`, the threads in that block will exit without updating + * `global_set` or setting `global_mapping_index`. Else, we insert the unique keys found to the + * global hash set, and save the row index of the global sparse table in `global_mapping_index`. + */ +template +CUDF_KERNEL void mapping_indices_kernel(cudf::size_type num_input_rows, + SetRef global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback) +{ + __shared__ cudf::size_type shared_set_indices[GROUPBY_SHM_MAX_ELEMENTS]; + + // Shared set initialization + __shared__ cuco::window windows[window_extent.value()]; + + auto raw_set = cuco::static_set_ref{ + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + global_set.key_eq(), + probing_scheme_t{global_set.hash_function()}, + cuco::thread_scope_block, + cuco::aow_storage_ref{ + window_extent, windows}}; + auto shared_set = raw_set.rebind_operators(cuco::insert_and_find); + + auto const block = cooperative_groups::this_thread_block(); + shared_set.initialize(block); + + __shared__ cudf::size_type cardinality; + if (block.thread_rank() == 0) { cardinality = 0; } + block.sync(); + + auto const stride = cudf::detail::grid_1d::grid_stride(); + + for (auto idx = cudf::detail::grid_1d::global_thread_id(); + idx - block.thread_rank() < num_input_rows; + idx += stride) { + find_local_mapping(block, + idx, + num_input_rows, + shared_set, + row_bitmask, + skip_rows_with_nulls, + &cardinality, + local_mapping_index, + shared_set_indices); + + block.sync(); + + if (cardinality >= GROUPBY_CARDINALITY_THRESHOLD) { + if (block.thread_rank() == 0) { needs_global_memory_fallback->test_and_set(); } + break; + } + } + + // Insert unique keys from shared to global hash set if block-cardinality + // doesn't exceed the threshold upper-limit + if (cardinality < GROUPBY_CARDINALITY_THRESHOLD) { + find_global_mapping(block, cardinality, global_set, shared_set_indices, global_mapping_index); + } + + if (block.thread_rank() == 0) { block_cardinality[block.group_index().x] = cardinality; } +} + +template +cudf::size_type max_occupancy_grid_size(cudf::size_type n) +{ + cudf::size_type max_active_blocks{-1}; + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &max_active_blocks, mapping_indices_kernel, GROUPBY_BLOCK_SIZE, 0)); + auto const grid_size = max_active_blocks * cudf::detail::num_multiprocessors(); + auto const num_blocks = cudf::util::div_rounding_up_safe(n, GROUPBY_BLOCK_SIZE); + return std::min(grid_size, num_blocks); +} + +template +void compute_mapping_indices(cudf::size_type grid_size, + cudf::size_type num, + SetRef global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream) +{ + mapping_indices_kernel<<>>( + num, + global_set, + row_bitmask, + skip_rows_with_nulls, + local_mapping_index, + global_mapping_index, + block_cardinality, + needs_global_memory_fallback); +} +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_mapping_indices.hpp b/cpp/src/groupby/hash/compute_mapping_indices.hpp new file mode 100644 index 00000000000..473ad99e650 --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +namespace cudf::groupby::detail::hash { +/* + * @brief Computes the maximum number of active blocks of the given kernel that can be executed on + * the underlying device + */ +template +[[nodiscard]] cudf::size_type max_occupancy_grid_size(cudf::size_type n); + +template +void compute_mapping_indices(cudf::size_type grid_size, + cudf::size_type num, + SetRef global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_mapping_indices_null.cu b/cpp/src/groupby/hash/compute_mapping_indices_null.cu new file mode 100644 index 00000000000..81c3c9e456f --- /dev/null +++ b/cpp/src/groupby/hash/compute_mapping_indices_null.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_mapping_indices.cuh" +#include "compute_mapping_indices.hpp" + +namespace cudf::groupby::detail::hash { +template cudf::size_type +max_occupancy_grid_size>(cudf::size_type n); + +template void compute_mapping_indices>( + cudf::size_type grid_size, + cudf::size_type num, + nullable_hash_set_ref_t global_set, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cuda::std::atomic_flag* needs_global_memory_fallback, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu new file mode 100644 index 00000000000..12c02a1865e --- /dev/null +++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_shared_memory_aggs.hpp" +#include "global_memory_aggregator.cuh" +#include "helpers.cuh" +#include "shared_memory_aggregator.cuh" +#include "single_pass_functors.cuh" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +namespace { +/// Functor used by type dispatcher returning the size of the underlying C++ type +struct size_of_functor { + template + __device__ constexpr cudf::size_type operator()() + { + return sizeof(T); + } +}; + +/// Shared memory data alignment +CUDF_HOST_DEVICE cudf::size_type constexpr ALIGNMENT = 8; + +// Prepares shared memory data required by each output column, exits if +// no enough memory space to perform the shared memory aggregation for the +// current output column +__device__ void calculate_columns_to_aggregate(cudf::size_type& col_start, + cudf::size_type& col_end, + cudf::mutable_table_device_view output_values, + cudf::size_type output_size, + cudf::size_type* shmem_agg_res_offsets, + cudf::size_type* shmem_agg_mask_offsets, + cudf::size_type cardinality, + cudf::size_type total_agg_size) +{ + col_start = col_end; + cudf::size_type bytes_allocated = 0; + + auto const valid_col_size = + cudf::util::round_up_safe(static_cast(sizeof(bool) * cardinality), ALIGNMENT); + + while (bytes_allocated < total_agg_size && col_end < output_size) { + auto const col_idx = col_end; + auto const next_col_size = + cudf::util::round_up_safe(cudf::type_dispatcher( + output_values.column(col_idx).type(), size_of_functor{}) * + cardinality, + ALIGNMENT); + auto const next_col_total_size = next_col_size + valid_col_size; + + if (bytes_allocated + next_col_total_size > total_agg_size) { + CUDF_UNREACHABLE("Not enough memory for shared memory aggregations"); + } + + shmem_agg_res_offsets[col_end] = bytes_allocated; + shmem_agg_mask_offsets[col_end] = bytes_allocated + next_col_size; + + bytes_allocated += next_col_total_size; + ++col_end; + } +} + +// Each block initialize its own shared memory aggregation results +__device__ void initialize_shmem_aggregations(cooperative_groups::thread_block const& block, + cudf::size_type col_start, + cudf::size_type col_end, + cudf::mutable_table_device_view output_values, + cuda::std::byte* shmem_agg_storage, + cudf::size_type* shmem_agg_res_offsets, + cudf::size_type* shmem_agg_mask_offsets, + cudf::size_type cardinality, + cudf::aggregation::Kind const* d_agg_kinds) +{ + for (auto col_idx = col_start; col_idx < col_end; col_idx++) { + for (auto idx = block.thread_rank(); idx < cardinality; idx += block.num_threads()) { + auto target = + reinterpret_cast(shmem_agg_storage + shmem_agg_res_offsets[col_idx]); + auto target_mask = + reinterpret_cast(shmem_agg_storage + shmem_agg_mask_offsets[col_idx]); + cudf::detail::dispatch_type_and_aggregation(output_values.column(col_idx).type(), + d_agg_kinds[col_idx], + initialize_shmem{}, + target, + target_mask, + idx); + } + } + block.sync(); +} + +__device__ void compute_pre_aggregrations(cudf::size_type col_start, + cudf::size_type col_end, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::table_device_view source, + cudf::size_type num_input_rows, + cudf::size_type* local_mapping_index, + cuda::std::byte* shmem_agg_storage, + cudf::size_type* shmem_agg_res_offsets, + cudf::size_type* shmem_agg_mask_offsets, + cudf::aggregation::Kind const* d_agg_kinds) +{ + // Aggregates global memory sources to shared memory targets + for (auto source_idx = cudf::detail::grid_1d::global_thread_id(); source_idx < num_input_rows; + source_idx += cudf::detail::grid_1d::grid_stride()) { + if (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, source_idx)) { + auto const target_idx = local_mapping_index[source_idx]; + for (auto col_idx = col_start; col_idx < col_end; col_idx++) { + auto const source_col = source.column(col_idx); + + cuda::std::byte* target = + reinterpret_cast(shmem_agg_storage + shmem_agg_res_offsets[col_idx]); + bool* target_mask = + reinterpret_cast(shmem_agg_storage + shmem_agg_mask_offsets[col_idx]); + + cudf::detail::dispatch_type_and_aggregation(source_col.type(), + d_agg_kinds[col_idx], + shmem_element_aggregator{}, + target, + target_mask, + target_idx, + source_col, + source_idx); + } + } + } +} + +__device__ void compute_final_aggregations(cooperative_groups::thread_block const& block, + cudf::size_type col_start, + cudf::size_type col_end, + cudf::table_device_view input_values, + cudf::mutable_table_device_view target, + cudf::size_type cardinality, + cudf::size_type* global_mapping_index, + cuda::std::byte* shmem_agg_storage, + cudf::size_type* agg_res_offsets, + cudf::size_type* agg_mask_offsets, + cudf::aggregation::Kind const* d_agg_kinds) +{ + // Aggregates shared memory sources to global memory targets + for (auto idx = block.thread_rank(); idx < cardinality; idx += block.num_threads()) { + auto const target_idx = + global_mapping_index[block.group_index().x * GROUPBY_SHM_MAX_ELEMENTS + idx]; + for (auto col_idx = col_start; col_idx < col_end; col_idx++) { + auto target_col = target.column(col_idx); + + cuda::std::byte* source = + reinterpret_cast(shmem_agg_storage + agg_res_offsets[col_idx]); + bool* source_mask = reinterpret_cast(shmem_agg_storage + agg_mask_offsets[col_idx]); + + cudf::detail::dispatch_type_and_aggregation(input_values.column(col_idx).type(), + d_agg_kinds[col_idx], + gmem_element_aggregator{}, + target_col, + target_idx, + input_values.column(col_idx), + source, + source_mask, + idx); + } + } + block.sync(); +} + +/* Takes the local_mapping_index and global_mapping_index to compute + * pre (shared) and final (global) aggregates*/ +CUDF_KERNEL void single_pass_shmem_aggs_kernel(cudf::size_type num_rows, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cudf::table_device_view input_values, + cudf::mutable_table_device_view output_values, + cudf::aggregation::Kind const* d_agg_kinds, + cudf::size_type total_agg_size, + cudf::size_type offsets_size) +{ + auto const block = cooperative_groups::this_thread_block(); + auto const cardinality = block_cardinality[block.group_index().x]; + if (cardinality >= GROUPBY_CARDINALITY_THRESHOLD) { return; } + + auto const num_cols = output_values.num_columns(); + + __shared__ cudf::size_type col_start; + __shared__ cudf::size_type col_end; + extern __shared__ cuda::std::byte shmem_agg_storage[]; + + cudf::size_type* shmem_agg_res_offsets = + reinterpret_cast(shmem_agg_storage + total_agg_size); + cudf::size_type* shmem_agg_mask_offsets = + reinterpret_cast(shmem_agg_storage + total_agg_size + offsets_size); + + if (block.thread_rank() == 0) { + col_start = 0; + col_end = 0; + } + block.sync(); + + while (col_end < num_cols) { + if (block.thread_rank() == 0) { + calculate_columns_to_aggregate(col_start, + col_end, + output_values, + num_cols, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + cardinality, + total_agg_size); + } + block.sync(); + + initialize_shmem_aggregations(block, + col_start, + col_end, + output_values, + shmem_agg_storage, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + cardinality, + d_agg_kinds); + + compute_pre_aggregrations(col_start, + col_end, + row_bitmask, + skip_rows_with_nulls, + input_values, + num_rows, + local_mapping_index, + shmem_agg_storage, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + d_agg_kinds); + block.sync(); + + compute_final_aggregations(block, + col_start, + col_end, + input_values, + output_values, + cardinality, + global_mapping_index, + shmem_agg_storage, + shmem_agg_res_offsets, + shmem_agg_mask_offsets, + d_agg_kinds); + } +} +} // namespace + +std::size_t available_shared_memory_size(cudf::size_type grid_size) +{ + auto const active_blocks_per_sm = + cudf::util::div_rounding_up_safe(grid_size, cudf::detail::num_multiprocessors()); + + size_t dynamic_shmem_size = 0; + CUDF_CUDA_TRY(cudaOccupancyAvailableDynamicSMemPerBlock( + &dynamic_shmem_size, single_pass_shmem_aggs_kernel, active_blocks_per_sm, GROUPBY_BLOCK_SIZE)); + return cudf::util::round_down_safe(static_cast(0.5 * dynamic_shmem_size), + ALIGNMENT); +} + +void compute_shared_memory_aggs(cudf::size_type grid_size, + std::size_t available_shmem_size, + cudf::size_type num_input_rows, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cudf::table_device_view input_values, + cudf::mutable_table_device_view output_values, + cudf::aggregation::Kind const* d_agg_kinds, + rmm::cuda_stream_view stream) +{ + // For each aggregation, need one offset determining where the aggregation is + // performed, another indicating the validity of the aggregation + auto const shmem_offsets_size = output_values.num_columns() * sizeof(cudf::size_type); + // The rest of shmem is utilized for the actual arrays in shmem + CUDF_EXPECTS(available_shmem_size > shmem_offsets_size * 2, + "No enough space for shared memory aggregations"); + auto const shmem_agg_size = available_shmem_size - shmem_offsets_size * 2; + single_pass_shmem_aggs_kernel<<>>( + num_input_rows, + row_bitmask, + skip_rows_with_nulls, + local_mapping_index, + global_mapping_index, + block_cardinality, + input_values, + output_values, + d_agg_kinds, + shmem_agg_size, + shmem_offsets_size); +} +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp b/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp new file mode 100644 index 00000000000..653821fd53b --- /dev/null +++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { + +std::size_t available_shared_memory_size(cudf::size_type grid_size); + +void compute_shared_memory_aggs(cudf::size_type grid_size, + std::size_t available_shmem_size, + cudf::size_type num_input_rows, + bitmask_type const* row_bitmask, + bool skip_rows_with_nulls, + cudf::size_type* local_mapping_index, + cudf::size_type* global_mapping_index, + cudf::size_type* block_cardinality, + cudf::table_device_view input_values, + cudf::mutable_table_device_view output_values, + cudf::aggregation::Kind const* d_agg_kinds, + rmm::cuda_stream_view stream); + +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_single_pass_aggs.cu b/cpp/src/groupby/hash/compute_single_pass_aggs.cu new file mode 100644 index 00000000000..e292543e6e9 --- /dev/null +++ b/cpp/src/groupby/hash/compute_single_pass_aggs.cu @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compute_single_pass_aggs.hpp" +#include "create_sparse_results_table.hpp" +#include "flatten_single_pass_aggs.hpp" +#include "helpers.cuh" +#include "single_pass_functors.cuh" +#include "var_hash_functor.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Computes all aggregations from `requests` that require a single pass + * over the data and stores the results in `sparse_results` + */ +template +void compute_single_pass_aggs(int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + SetType set, + host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream) +{ + // flatten the aggs to a table that can be operated on by aggregate_row + auto const [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests); + + // make table that will hold sparse results + table sparse_table = create_sparse_results_table(flattened_values, agg_kinds, stream); + // prepare to launch kernel to do the actual aggregation + auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); + auto d_values = table_device_view::create(flattened_values, stream); + auto const d_aggs = cudf::detail::make_device_uvector_async( + agg_kinds, stream, cudf::get_current_device_resource_ref()); + + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + num_keys, + hash::compute_single_pass_aggs_fn{ + set, *d_values, *d_sparse_table, d_aggs.data(), row_bitmask, skip_rows_with_nulls}); + // Add results back to sparse_results cache + auto sparse_result_cols = sparse_table.release(); + for (size_t i = 0; i < aggs.size(); i++) { + // Note that the cache will make a copy of this temporary aggregation + sparse_results->add_result( + flattened_values.column(i), *aggs[i], std::move(sparse_result_cols[i])); + } +} + +template void compute_single_pass_aggs>( + int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + hash_set_ref_t set, + host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream); + +template void compute_single_pass_aggs>( + int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + nullable_hash_set_ref_t set, + host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/compute_single_pass_aggs.hpp b/cpp/src/groupby/hash/compute_single_pass_aggs.hpp new file mode 100644 index 00000000000..a7434bdf61a --- /dev/null +++ b/cpp/src/groupby/hash/compute_single_pass_aggs.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Computes all aggregations from `requests` that require a single pass + * over the data and stores the results in `sparse_results` + */ +template +void compute_single_pass_aggs(int64_t num_keys, + bool skip_rows_with_nulls, + bitmask_type const* row_bitmask, + SetType set, + cudf::host_span requests, + cudf::detail::result_cache* sparse_results, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/create_sparse_results_table.cu b/cpp/src/groupby/hash/create_sparse_results_table.cu new file mode 100644 index 00000000000..22fa4fc584c --- /dev/null +++ b/cpp/src/groupby/hash/create_sparse_results_table.cu @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "create_sparse_results_table.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +namespace cudf::groupby::detail::hash { +// make table that will hold sparse results +cudf::table create_sparse_results_table(table_view const& flattened_values, + std::vector aggs, + rmm::cuda_stream_view stream) +{ + // TODO single allocation - room for performance improvement + std::vector> sparse_columns; + sparse_columns.reserve(flattened_values.num_columns()); + std::transform( + flattened_values.begin(), + flattened_values.end(), + aggs.begin(), + std::back_inserter(sparse_columns), + [stream](auto const& col, auto const& agg) { + bool nullable = + (agg == aggregation::COUNT_VALID or agg == aggregation::COUNT_ALL) + ? false + : (col.has_nulls() or agg == aggregation::VARIANCE or agg == aggregation::STD); + auto mask_flag = (nullable) ? mask_state::ALL_NULL : mask_state::UNALLOCATED; + + auto col_type = cudf::is_dictionary(col.type()) + ? cudf::dictionary_column_view(col).keys().type() + : col.type(); + + return make_fixed_width_column( + cudf::detail::target_type(col_type, agg), col.size(), mask_flag, stream); + }); + + table sparse_table(std::move(sparse_columns)); + mutable_table_view table_view = sparse_table.mutable_view(); + cudf::detail::initialize_with_identity(table_view, aggs, stream); + return sparse_table; +} +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/create_sparse_results_table.hpp b/cpp/src/groupby/hash/create_sparse_results_table.hpp new file mode 100644 index 00000000000..c1d4e0d3f20 --- /dev/null +++ b/cpp/src/groupby/hash/create_sparse_results_table.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace cudf::groupby::detail::hash { +// make table that will hold sparse results +cudf::table create_sparse_results_table(table_view const& flattened_values, + std::vector aggs_kinds, + rmm::cuda_stream_view stream); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp b/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp index 2bf983e5e90..dfad51f27d4 100644 --- a/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp +++ b/cpp/src/groupby/hash/flatten_single_pass_aggs.hpp @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/cpp/src/groupby/hash/global_memory_aggregator.cuh b/cpp/src/groupby/hash/global_memory_aggregator.cuh new file mode 100644 index 00000000000..50e89c727ff --- /dev/null +++ b/cpp/src/groupby/hash/global_memory_aggregator.cuh @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +struct update_target_element_gmem { + __device__ void operator()(cudf::mutable_column_device_view, + cudf::size_type, + cudf::column_device_view, + cuda::std::byte*, + cudf::size_type) const noexcept + { + CUDF_UNREACHABLE("Invalid source type and aggregation combination."); + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::MIN, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using DeviceType = cudf::detail::underlying_target_t; + DeviceType* source_casted = reinterpret_cast(source); + cudf::detail::atomic_min(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::MAX, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using DeviceType = cudf::detail::underlying_target_t; + DeviceType* source_casted = reinterpret_cast(source); + cudf::detail::atomic_max(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::SUM, + cuda::std::enable_if_t() && cudf::has_atomic_support() && + !cudf::is_timestamp()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using DeviceType = cudf::detail::underlying_target_t; + DeviceType* source_casted = reinterpret_cast(source); + cudf::detail::atomic_add(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +// The shared memory will already have it squared +template +struct update_target_element_gmem< + Source, + cudf::aggregation::SUM_OF_SQUARES, + cuda::std::enable_if_t()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + Target value = static_cast(source_casted[source_index]); + + cudf::detail::atomic_add(&target.element(target_index), value); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::PRODUCT, + cuda::std::enable_if_t()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + cudf::detail::atomic_mul(&target.element(target_index), + static_cast(source_casted[source_index])); + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +// Assuming that the target column of COUNT_VALID, COUNT_ALL would be using fixed_width column and +// non-fixed point column +template +struct update_target_element_gmem< + Source, + cudf::aggregation::COUNT_VALID, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + cudf::detail::atomic_add(&target.element(target_index), + static_cast(source_casted[source_index])); + + // It is assumed the output for COUNT_VALID is initialized to be all valid + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::COUNT_ALL, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + + Target* source_casted = reinterpret_cast(source); + cudf::detail::atomic_add(&target.element(target_index), + static_cast(source_casted[source_index])); + + // It is assumed the output for COUNT_ALL is initialized to be all valid + } +}; + +template +struct update_target_element_gmem< + Source, + cudf::aggregation::ARGMAX, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* source_casted = reinterpret_cast(source); + auto source_argmax_index = source_casted[source_index]; + auto old = cudf::detail::atomic_cas( + &target.element(target_index), cudf::detail::ARGMAX_SENTINEL, source_argmax_index); + if (old != cudf::detail::ARGMAX_SENTINEL) { + while (source_column.element(source_argmax_index) > + source_column.element(old)) { + old = + cudf::detail::atomic_cas(&target.element(target_index), old, source_argmax_index); + } + } + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; +template +struct update_target_element_gmem< + Source, + cudf::aggregation::ARGMIN, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* source_casted = reinterpret_cast(source); + auto source_argmin_index = source_casted[source_index]; + auto old = cudf::detail::atomic_cas( + &target.element(target_index), cudf::detail::ARGMIN_SENTINEL, source_argmin_index); + if (old != cudf::detail::ARGMIN_SENTINEL) { + while (source_column.element(source_argmin_index) < + source_column.element(old)) { + old = + cudf::detail::atomic_cas(&target.element(target_index), old, source_argmin_index); + } + } + + if (target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + +/** + * @brief A functor that updates a single element in the target column stored in global memory by + * applying an aggregation operation to a corresponding element from a source column in shared + * memory. + * + * This functor can NOT be used for dictionary columns. + * + * This is a redundant copy replicating the behavior of `elementwise_aggregator` from + * `cudf/detail/aggregation/device_aggregators.cuh`. The key difference is that this functor accepts + * a pointer to raw bytes as the source, as `column_device_view` cannot yet be constructed from + * shared memory. + */ +struct gmem_element_aggregator { + template + __device__ void operator()(cudf::mutable_column_device_view target, + cudf::size_type target_index, + cudf::column_device_view source_column, + cuda::std::byte* source, + bool* source_mask, + cudf::size_type source_index) const noexcept + { + // Early exit for all aggregation kinds since shared memory aggregation of + // `COUNT_ALL` is always valid + if (!source_mask[source_index]) { return; } + + update_target_element_gmem{}( + target, target_index, source_column, source, source_index); + } +}; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 0432b9d120a..30e1d52fdbf 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -14,60 +14,32 @@ * limitations under the License. */ -#include "flatten_single_pass_aggs.hpp" +#include "compute_groupby.hpp" #include "groupby/common/utils.hpp" -#include "groupby_kernels.cuh" -#include "var_hash_functor.cuh" +#include "helpers.cuh" #include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include -#include -#include +#include #include #include -#include #include #include -#include #include #include -#include #include #include #include -#include -#include -#include - +#include #include -#include #include +#include -namespace cudf { -namespace groupby { -namespace detail { -namespace hash { +namespace cudf::groupby::detail::hash { namespace { - -// TODO: similar to `contains_table`, using larger CG size like 2 or 4 for nested -// types and `cg_size = 1`for flat data to improve performance -using probing_scheme_type = cuco::linear_probing< - 1, ///< Number of threads used to handle each input key - cudf::experimental::row::hash::device_row_hasher>; - /** * @brief List of aggregation operations that can be computed with a hash-based * implementation. @@ -112,413 +84,33 @@ bool constexpr is_hash_aggregation(aggregation::Kind t) return array_contains(hash_aggregations, t); } -template -class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { - column_view col; - data_type result_type; - cudf::detail::result_cache* sparse_results; - cudf::detail::result_cache* dense_results; - device_span gather_map; - SetType set; - bitmask_type const* __restrict__ row_bitmask; - rmm::cuda_stream_view stream; - rmm::device_async_resource_ref mr; - - public: - using cudf::detail::aggregation_finalizer::visit; - - hash_compound_agg_finalizer(column_view col, - cudf::detail::result_cache* sparse_results, - cudf::detail::result_cache* dense_results, - device_span gather_map, - SetType set, - bitmask_type const* row_bitmask, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - : col(col), - sparse_results(sparse_results), - dense_results(dense_results), - gather_map(gather_map), - set(set), - row_bitmask(row_bitmask), - stream(stream), - mr(mr) - { - result_type = cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).keys().type() - : col.type(); - } - - auto to_dense_agg_result(cudf::aggregation const& agg) - { - auto s = sparse_results->get_result(col, agg); - auto dense_result_table = cudf::detail::gather(table_view({std::move(s)}), - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); - return std::move(dense_result_table->release()[0]); - } - - // Enables conversion of ARGMIN/ARGMAX into MIN/MAX - auto gather_argminmax(aggregation const& agg) - { - auto arg_result = to_dense_agg_result(agg); - // We make a view of ARG(MIN/MAX) result without a null mask and gather - // using this map. The values in data buffer of ARG(MIN/MAX) result - // corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL - // which is an out of bounds index value (-1) and causes the gathered - // value to be null. - column_view null_removed_map( - data_type(type_to_id()), - arg_result->size(), - static_cast(arg_result->view().template data()), - nullptr, - 0); - auto gather_argminmax = - cudf::detail::gather(table_view({col}), - null_removed_map, - arg_result->nullable() ? cudf::out_of_bounds_policy::NULLIFY - : cudf::out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); - return std::move(gather_argminmax->release()[0]); - } - - // Declare overloads for each kind of aggregation to dispatch - void visit(cudf::aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - - void visit(cudf::detail::min_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - if (result_type.id() == type_id::STRING) { - auto transformed_agg = make_argmin_aggregation(); - dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); - } else { - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - } - - void visit(cudf::detail::max_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - - if (result_type.id() == type_id::STRING) { - auto transformed_agg = make_argmax_aggregation(); - dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); - } else { - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - } - - void visit(cudf::detail::mean_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - - auto sum_agg = make_sum_aggregation(); - auto count_agg = make_count_aggregation(); - this->visit(*sum_agg); - this->visit(*count_agg); - column_view sum_result = dense_results->get_result(col, *sum_agg); - column_view count_result = dense_results->get_result(col, *count_agg); - - auto result = - cudf::detail::binary_operation(sum_result, - count_result, - binary_operator::DIV, - cudf::detail::target_type(result_type, aggregation::MEAN), - stream, - mr); - dense_results->add_result(col, agg, std::move(result)); - } - - void visit(cudf::detail::var_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - - auto sum_agg = make_sum_aggregation(); - auto count_agg = make_count_aggregation(); - this->visit(*sum_agg); - this->visit(*count_agg); - column_view sum_result = sparse_results->get_result(col, *sum_agg); - column_view count_result = sparse_results->get_result(col, *count_agg); - - auto values_view = column_device_view::create(col, stream); - auto sum_view = column_device_view::create(sum_result, stream); - auto count_view = column_device_view::create(count_result, stream); - - auto var_result = make_fixed_width_column( - cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream); - auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream); - mutable_table_view var_table_view{{var_result->mutable_view()}}; - cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream); - - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - col.size(), - var_hash_functor{ - set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); - sparse_results->add_result(col, agg, std::move(var_result)); - dense_results->add_result(col, agg, to_dense_agg_result(agg)); - } - - void visit(cudf::detail::std_aggregation const& agg) override - { - if (dense_results->has_result(col, agg)) return; - auto var_agg = make_variance_aggregation(agg._ddof); - this->visit(*dynamic_cast(var_agg.get())); - column_view variance = dense_results->get_result(col, *var_agg); - - auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr); - dense_results->add_result(col, agg, std::move(result)); - } -}; - -/** - * @brief Gather sparse results into dense using `gather_map` and add to - * `dense_cache` - * - * @see groupby_null_templated() - */ -template -void sparse_to_dense_results(table_view const& keys, - host_span requests, - cudf::detail::result_cache* sparse_results, - cudf::detail::result_cache* dense_results, - device_span gather_map, - SetType set, - bool keys_have_nulls, - null_policy include_null_keys, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto row_bitmask = - cudf::detail::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first; - bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; - bitmask_type const* row_bitmask_ptr = - skip_key_rows_with_nulls ? static_cast(row_bitmask.data()) : nullptr; - - for (auto const& request : requests) { - auto const& agg_v = request.aggregations; - auto const& col = request.values; - - // Given an aggregation, this will get the result from sparse_results and - // convert and return dense, compacted result - auto finalizer = hash_compound_agg_finalizer( - col, sparse_results, dense_results, gather_map, set, row_bitmask_ptr, stream, mr); - for (auto&& agg : agg_v) { - agg->finalize(finalizer); - } - } -} - -// make table that will hold sparse results -auto create_sparse_results_table(table_view const& flattened_values, - std::vector aggs, - rmm::cuda_stream_view stream) -{ - // TODO single allocation - room for performance improvement - std::vector> sparse_columns; - std::transform( - flattened_values.begin(), - flattened_values.end(), - aggs.begin(), - std::back_inserter(sparse_columns), - [stream](auto const& col, auto const& agg) { - bool nullable = - (agg == aggregation::COUNT_VALID or agg == aggregation::COUNT_ALL) - ? false - : (col.has_nulls() or agg == aggregation::VARIANCE or agg == aggregation::STD); - auto mask_flag = (nullable) ? mask_state::ALL_NULL : mask_state::UNALLOCATED; - - auto col_type = cudf::is_dictionary(col.type()) - ? cudf::dictionary_column_view(col).keys().type() - : col.type(); - - return make_fixed_width_column( - cudf::detail::target_type(col_type, agg), col.size(), mask_flag, stream); - }); - - table sparse_table(std::move(sparse_columns)); - mutable_table_view table_view = sparse_table.mutable_view(); - cudf::detail::initialize_with_identity(table_view, aggs, stream); - return sparse_table; -} - -/** - * @brief Computes all aggregations from `requests` that require a single pass - * over the data and stores the results in `sparse_results` - */ -template -void compute_single_pass_aggs(table_view const& keys, - host_span requests, - cudf::detail::result_cache* sparse_results, - SetType set, - bool keys_have_nulls, - null_policy include_null_keys, - rmm::cuda_stream_view stream) -{ - // flatten the aggs to a table that can be operated on by aggregate_row - auto const [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests); - - // make table that will hold sparse results - table sparse_table = create_sparse_results_table(flattened_values, agg_kinds, stream); - // prepare to launch kernel to do the actual aggregation - auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); - auto d_values = table_device_view::create(flattened_values, stream); - auto const d_aggs = cudf::detail::make_device_uvector_async( - agg_kinds, stream, cudf::get_current_device_resource_ref()); - auto const skip_key_rows_with_nulls = - keys_have_nulls and include_null_keys == null_policy::EXCLUDE; - - auto row_bitmask = - skip_key_rows_with_nulls - ? cudf::detail::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first - : rmm::device_buffer{}; - - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - keys.num_rows(), - hash::compute_single_pass_aggs_fn{set, - *d_values, - *d_sparse_table, - d_aggs.data(), - static_cast(row_bitmask.data()), - skip_key_rows_with_nulls}); - // Add results back to sparse_results cache - auto sparse_result_cols = sparse_table.release(); - for (size_t i = 0; i < aggs.size(); i++) { - // Note that the cache will make a copy of this temporary aggregation - sparse_results->add_result( - flattened_values.column(i), *aggs[i], std::move(sparse_result_cols[i])); - } -} - -/** - * @brief Computes and returns a device vector containing all populated keys in - * `map`. - */ -template -rmm::device_uvector extract_populated_keys(SetType const& key_set, - size_type num_keys, - rmm::cuda_stream_view stream) -{ - rmm::device_uvector populated_keys(num_keys, stream); - auto const keys_end = key_set.retrieve_all(populated_keys.begin(), stream.value()); - - populated_keys.resize(std::distance(populated_keys.begin(), keys_end), stream); - return populated_keys; -} - -/** - * @brief Computes groupby using hash table. - * - * First, we create a hash table that stores the indices of unique rows in - * `keys`. The upper limit on the number of values in this map is the number - * of rows in `keys`. - * - * To store the results of aggregations, we create temporary sparse columns - * which have the same size as input value columns. Using the hash map, we - * determine the location within the sparse column to write the result of the - * aggregation into. - * - * The sparse column results of all aggregations are stored into the cache - * `sparse_results`. This enables the use of previously calculated results in - * other aggregations. - * - * All the aggregations which can be computed in a single pass are computed - * first, in a combined kernel. Then using these results, aggregations that - * require multiple passes, will be computed. - * - * Finally, using the hash map, we generate a vector of indices of populated - * values in sparse result columns. Then, for each aggregation originally - * requested in `requests`, we gather sparse results into a column of dense - * results using the aforementioned index vector. Dense results are stored into - * the in/out parameter `cache`. - */ -std::unique_ptr
groupby(table_view const& keys, - host_span requests, - cudf::detail::result_cache* cache, - bool const keys_have_nulls, - null_policy const include_null_keys, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) +std::unique_ptr
dispatch_groupby(table_view const& keys, + host_span requests, + cudf::detail::result_cache* cache, + bool const keys_have_nulls, + null_policy const include_null_keys, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { - // convert to int64_t to avoid potential overflow with large `keys` - auto const num_keys = static_cast(keys.num_rows()); - auto const null_keys_are_equal = null_equality::EQUAL; - auto const has_null = nullate::DYNAMIC{cudf::has_nested_nulls(keys)}; + auto const null_keys_are_equal = null_equality::EQUAL; + auto const has_null = nullate::DYNAMIC{cudf::has_nested_nulls(keys)}; + auto const skip_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream); auto const comparator = cudf::experimental::row::equality::self_comparator{preprocessed_keys}; auto const row_hash = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)}; auto const d_row_hash = row_hash.device_hasher(has_null); - // Cache of sparse results where the location of aggregate value in each - // column is indexed by the hash set - cudf::detail::result_cache sparse_results(requests.size()); - - auto const comparator_helper = [&](auto const d_key_equal) { - auto const set = cuco::static_set{ - num_keys, - 0.5, // desired load factor - cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, - d_key_equal, - probing_scheme_type{d_row_hash}, - cuco::thread_scope_device, - cuco::storage<1>{}, - cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, - stream.value()}; - - // Compute all single pass aggs first - compute_single_pass_aggs(keys, - requests, - &sparse_results, - set.ref(cuco::insert_and_find), - keys_have_nulls, - include_null_keys, - stream); - - // Extract the populated indices from the hash set and create a gather map. - // Gathering using this map from sparse results will give dense results. - auto gather_map = extract_populated_keys(set, keys.num_rows(), stream); - - // Compact all results from sparse_results and insert into cache - sparse_to_dense_results(keys, - requests, - &sparse_results, - cache, - gather_map, - set.ref(cuco::find), - keys_have_nulls, - include_null_keys, - stream, - mr); - - return cudf::detail::gather(keys, - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); - }; - if (cudf::detail::has_nested_columns(keys)) { - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); + auto const d_row_equal = comparator.equal_to(has_null, null_keys_are_equal); + return compute_groupby( + keys, requests, skip_rows_with_nulls, d_row_equal, d_row_hash, cache, stream, mr); } else { - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); + auto const d_row_equal = comparator.equal_to(has_null, null_keys_are_equal); + return compute_groupby( + keys, requests, skip_rows_with_nulls, d_row_equal, d_row_hash, cache, stream, mr); } } - } // namespace /** @@ -559,11 +151,8 @@ std::pair, std::vector> groupby( cudf::detail::result_cache cache(requests.size()); std::unique_ptr
unique_keys = - groupby(keys, requests, &cache, cudf::has_nulls(keys), include_null_keys, stream, mr); + dispatch_groupby(keys, requests, &cache, cudf::has_nulls(keys), include_null_keys, stream, mr); return std::pair(std::move(unique_keys), extract_results(requests, cache, stream, mr)); } -} // namespace hash -} // namespace detail -} // namespace groupby -} // namespace cudf +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu new file mode 100644 index 00000000000..37a61c1a22c --- /dev/null +++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hash_compound_agg_finalizer.hpp" +#include "helpers.cuh" +#include "var_hash_functor.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +template +hash_compound_agg_finalizer::hash_compound_agg_finalizer( + column_view col, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetType set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : col(col), + sparse_results(sparse_results), + dense_results(dense_results), + gather_map(gather_map), + set(set), + row_bitmask(row_bitmask), + stream(stream), + mr(mr) +{ + result_type = + cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).keys().type() : col.type(); +} + +template +auto hash_compound_agg_finalizer::to_dense_agg_result(cudf::aggregation const& agg) +{ + auto s = sparse_results->get_result(col, agg); + auto dense_result_table = cudf::detail::gather(table_view({std::move(s)}), + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + return std::move(dense_result_table->release()[0]); +} + +template +auto hash_compound_agg_finalizer::gather_argminmax(aggregation const& agg) +{ + auto arg_result = to_dense_agg_result(agg); + // We make a view of ARG(MIN/MAX) result without a null mask and gather + // using this map. The values in data buffer of ARG(MIN/MAX) result + // corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL + // which is an out of bounds index value (-1) and causes the gathered + // value to be null. + column_view null_removed_map( + data_type(type_to_id()), + arg_result->size(), + static_cast(arg_result->view().template data()), + nullptr, + 0); + auto gather_argminmax = + cudf::detail::gather(table_view({col}), + null_removed_map, + arg_result->nullable() ? cudf::out_of_bounds_policy::NULLIFY + : cudf::out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + return std::move(gather_argminmax->release()[0]); +} + +template +void hash_compound_agg_finalizer::visit(cudf::aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + dense_results->add_result(col, agg, to_dense_agg_result(agg)); +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::min_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + if (result_type.id() == type_id::STRING) { + auto transformed_agg = make_argmin_aggregation(); + dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); + } else { + dense_results->add_result(col, agg, to_dense_agg_result(agg)); + } +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::max_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + + if (result_type.id() == type_id::STRING) { + auto transformed_agg = make_argmax_aggregation(); + dense_results->add_result(col, agg, gather_argminmax(*transformed_agg)); + } else { + dense_results->add_result(col, agg, to_dense_agg_result(agg)); + } +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::mean_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + + auto sum_agg = make_sum_aggregation(); + auto count_agg = make_count_aggregation(); + this->visit(*sum_agg); + this->visit(*count_agg); + column_view sum_result = dense_results->get_result(col, *sum_agg); + column_view count_result = dense_results->get_result(col, *count_agg); + + auto result = + cudf::detail::binary_operation(sum_result, + count_result, + binary_operator::DIV, + cudf::detail::target_type(result_type, aggregation::MEAN), + stream, + mr); + dense_results->add_result(col, agg, std::move(result)); +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::var_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + + auto sum_agg = make_sum_aggregation(); + auto count_agg = make_count_aggregation(); + this->visit(*sum_agg); + this->visit(*count_agg); + column_view sum_result = sparse_results->get_result(col, *sum_agg); + column_view count_result = sparse_results->get_result(col, *count_agg); + + auto values_view = column_device_view::create(col, stream); + auto sum_view = column_device_view::create(sum_result, stream); + auto count_view = column_device_view::create(count_result, stream); + + auto var_result = make_fixed_width_column( + cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream); + auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream); + mutable_table_view var_table_view{{var_result->mutable_view()}}; + cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream); + + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + col.size(), + var_hash_functor{ + set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); + sparse_results->add_result(col, agg, std::move(var_result)); + dense_results->add_result(col, agg, to_dense_agg_result(agg)); +} + +template +void hash_compound_agg_finalizer::visit(cudf::detail::std_aggregation const& agg) +{ + if (dense_results->has_result(col, agg)) return; + auto var_agg = make_variance_aggregation(agg._ddof); + this->visit(*dynamic_cast(var_agg.get())); + column_view variance = dense_results->get_result(col, *var_agg); + + auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr); + dense_results->add_result(col, agg, std::move(result)); +} + +template class hash_compound_agg_finalizer>; +template class hash_compound_agg_finalizer>; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp new file mode 100644 index 00000000000..8bee1a92c40 --- /dev/null +++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { + column_view col; + data_type result_type; + cudf::detail::result_cache* sparse_results; + cudf::detail::result_cache* dense_results; + device_span gather_map; + SetType set; + bitmask_type const* __restrict__ row_bitmask; + rmm::cuda_stream_view stream; + rmm::device_async_resource_ref mr; + + public: + using cudf::detail::aggregation_finalizer::visit; + + hash_compound_agg_finalizer(column_view col, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetType set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + + auto to_dense_agg_result(cudf::aggregation const& agg); + + // Enables conversion of ARGMIN/ARGMAX into MIN/MAX + auto gather_argminmax(cudf::aggregation const& agg); + + // Declare overloads for each kind of aggregation to dispatch + void visit(cudf::aggregation const& agg) override; + + void visit(cudf::detail::min_aggregation const& agg) override; + + void visit(cudf::detail::max_aggregation const& agg) override; + + void visit(cudf::detail::mean_aggregation const& agg) override; + + void visit(cudf::detail::var_aggregation const& agg) override; + + void visit(cudf::detail::std_aggregation const& agg) override; +}; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/helpers.cuh b/cpp/src/groupby/hash/helpers.cuh new file mode 100644 index 00000000000..00836567b4f --- /dev/null +++ b/cpp/src/groupby/hash/helpers.cuh @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include + +namespace cudf::groupby::detail::hash { +// TODO: similar to `contains_table`, using larger CG size like 2 or 4 for nested +// types and `cg_size = 1`for flat data to improve performance +/// Number of threads to handle each input element +CUDF_HOST_DEVICE auto constexpr GROUPBY_CG_SIZE = 1; + +/// Number of slots per thread +CUDF_HOST_DEVICE auto constexpr GROUPBY_WINDOW_SIZE = 1; + +/// Thread block size +CUDF_HOST_DEVICE auto constexpr GROUPBY_BLOCK_SIZE = 128; + +/// Threshold cardinality to switch between shared memory aggregations and global memory +/// aggregations +CUDF_HOST_DEVICE auto constexpr GROUPBY_CARDINALITY_THRESHOLD = 128; + +// We add additional `block_size`, because after the number of elements in the local hash set +// exceeds the threshold, all threads in the thread block can still insert one more element. +/// The maximum number of elements handled per block +CUDF_HOST_DEVICE auto constexpr GROUPBY_SHM_MAX_ELEMENTS = + GROUPBY_CARDINALITY_THRESHOLD + GROUPBY_BLOCK_SIZE; + +// GROUPBY_SHM_MAX_ELEMENTS with 0.7 occupancy +/// Shared memory hash set extent type +using shmem_extent_t = + cuco::extent(static_cast(GROUPBY_SHM_MAX_ELEMENTS) * 1.43)>; + +/// Number of windows needed by each shared memory hash set +CUDF_HOST_DEVICE auto constexpr window_extent = + cuco::make_window_extent(shmem_extent_t{}); + +using row_hash_t = + cudf::experimental::row::hash::device_row_hasher; + +/// Probing scheme type used by groupby hash table +using probing_scheme_t = cuco::linear_probing; + +using row_comparator_t = cudf::experimental::row::equality::device_row_comparator< + false, + cudf::nullate::DYNAMIC, + cudf::experimental::row::equality::nan_equal_physical_equality_comparator>; + +using nullable_row_comparator_t = cudf::experimental::row::equality::device_row_comparator< + true, + cudf::nullate::DYNAMIC, + cudf::experimental::row::equality::nan_equal_physical_equality_comparator>; + +using global_set_t = cuco::static_set, + cuda::thread_scope_device, + row_comparator_t, + probing_scheme_t, + cudf::detail::cuco_allocator, + cuco::storage>; + +using nullable_global_set_t = cuco::static_set, + cuda::thread_scope_device, + nullable_row_comparator_t, + probing_scheme_t, + cudf::detail::cuco_allocator, + cuco::storage>; + +template +using hash_set_ref_t = cuco::static_set_ref< + cudf::size_type, + cuda::thread_scope_device, + row_comparator_t, + probing_scheme_t, + cuco::aow_storage_ref>, + Op>; + +template +using nullable_hash_set_ref_t = cuco::static_set_ref< + cudf::size_type, + cuda::thread_scope_device, + nullable_row_comparator_t, + probing_scheme_t, + cuco::aow_storage_ref>, + Op>; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/shared_memory_aggregator.cuh b/cpp/src/groupby/hash/shared_memory_aggregator.cuh new file mode 100644 index 00000000000..9cbeeb34b86 --- /dev/null +++ b/cpp/src/groupby/hash/shared_memory_aggregator.cuh @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +struct update_target_element_shmem { + __device__ void operator()( + cuda::std::byte*, bool*, cudf::size_type, cudf::column_device_view, cudf::size_type) const + { + CUDF_UNREACHABLE("Invalid source type and aggregation combination."); + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::MIN, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using DeviceTarget = cudf::detail::underlying_target_t; + using DeviceSource = cudf::detail::underlying_source_t; + + DeviceTarget* target_casted = reinterpret_cast(target); + cudf::detail::atomic_min(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::MAX, + cuda::std::enable_if_t() && cudf::has_atomic_support()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using DeviceTarget = cudf::detail::underlying_target_t; + using DeviceSource = cudf::detail::underlying_source_t; + + DeviceTarget* target_casted = reinterpret_cast(target); + cudf::detail::atomic_max(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::SUM, + cuda::std::enable_if_t() && cudf::has_atomic_support() && + !cudf::is_timestamp()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using DeviceTarget = cudf::detail::underlying_target_t; + using DeviceSource = cudf::detail::underlying_source_t; + + DeviceTarget* target_casted = reinterpret_cast(target); + cudf::detail::atomic_add(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::SUM_OF_SQUARES, + cuda::std::enable_if_t()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + auto value = static_cast(source.element(source_index)); + cudf::detail::atomic_add(&target_casted[target_index], value * value); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::PRODUCT, + cuda::std::enable_if_t()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + cudf::detail::atomic_mul(&target_casted[target_index], + static_cast(source.element(source_index))); + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::COUNT_VALID, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + // The nullability was checked prior to this call in the `shmem_element_aggregator` functor + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + cudf::detail::atomic_add(&target_casted[target_index], Target{1}); + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::COUNT_ALL, + cuda::std::enable_if_t< + cudf::detail::is_valid_aggregation()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + cudf::detail::atomic_add(&target_casted[target_index], Target{1}); + + // Assumes target is already set to be valid + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::ARGMAX, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + auto old = cudf::detail::atomic_cas( + &target_casted[target_index], cudf::detail::ARGMAX_SENTINEL, source_index); + if (old != cudf::detail::ARGMAX_SENTINEL) { + while (source.element(source_index) > source.element(old)) { + old = cudf::detail::atomic_cas(&target_casted[target_index], old, source_index); + } + } + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +template +struct update_target_element_shmem< + Source, + cudf::aggregation::ARGMIN, + cuda::std::enable_if_t() and + cudf::is_relationally_comparable()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + using Target = cudf::detail::target_type_t; + Target* target_casted = reinterpret_cast(target); + auto old = cudf::detail::atomic_cas( + &target_casted[target_index], cudf::detail::ARGMIN_SENTINEL, source_index); + if (old != cudf::detail::ARGMIN_SENTINEL) { + while (source.element(source_index) < source.element(old)) { + old = cudf::detail::atomic_cas(&target_casted[target_index], old, source_index); + } + } + + if (!target_mask[target_index]) { target_mask[target_index] = true; } + } +}; + +/** + * @brief A functor that updates a single element in the target column stored in shared memory by + * applying an aggregation operation to a corresponding element from a source column in global + * memory. + * + * This functor can NOT be used for dictionary columns. + * + * This is a redundant copy replicating the behavior of `elementwise_aggregator` from + * `cudf/detail/aggregation/device_aggregators.cuh`. The key difference is that this functor accepts + * a pointer to raw bytes as the source, as `column_device_view` cannot yet be constructed from + * shared memory. + */ +struct shmem_element_aggregator { + template + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type target_index, + cudf::column_device_view source, + cudf::size_type source_index) const noexcept + { + // Check nullability for all aggregation kinds but `COUNT_ALL` + if constexpr (k != cudf::aggregation::COUNT_ALL) { + if (source.is_null(source_index)) { return; } + } + update_target_element_shmem{}( + target, target_mask, target_index, source, source_index); + } +}; +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/groupby_kernels.cuh b/cpp/src/groupby/hash/single_pass_functors.cuh similarity index 55% rename from cpp/src/groupby/hash/groupby_kernels.cuh rename to cpp/src/groupby/hash/single_pass_functors.cuh index 86f4d76487f..28a5b578e00 100644 --- a/cpp/src/groupby/hash/groupby_kernels.cuh +++ b/cpp/src/groupby/hash/single_pass_functors.cuh @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once #include @@ -21,12 +20,90 @@ #include #include -#include +#include + +namespace cudf::groupby::detail::hash { +// TODO: TO BE REMOVED issue tracked via #17171 +template +__device__ constexpr bool is_supported() +{ + return cudf::is_fixed_width() and + ((k == cudf::aggregation::SUM) or (k == cudf::aggregation::SUM_OF_SQUARES) or + (k == cudf::aggregation::MIN) or (k == cudf::aggregation::MAX) or + (k == cudf::aggregation::COUNT_VALID) or (k == cudf::aggregation::COUNT_ALL) or + (k == cudf::aggregation::ARGMIN) or (k == cudf::aggregation::ARGMAX) or + (k == cudf::aggregation::STD) or (k == cudf::aggregation::VARIANCE) or + (k == cudf::aggregation::PRODUCT) and cudf::detail::is_product_supported()); +} + +template +__device__ std::enable_if_t, void>, T> +identity_from_operator() +{ + using DeviceType = cudf::device_storage_type_t; + return cudf::detail::corresponding_operator_t::template identity(); +} + +template +__device__ std::enable_if_t, void>, T> +identity_from_operator() +{ + CUDF_UNREACHABLE("Unable to get identity/sentinel from device operator"); +} + +template +__device__ T get_identity() +{ + if ((k == cudf::aggregation::ARGMAX) or (k == cudf::aggregation::ARGMIN)) { + if constexpr (cudf::is_timestamp()) { + return k == cudf::aggregation::ARGMAX + ? T{typename T::duration(cudf::detail::ARGMAX_SENTINEL)} + : T{typename T::duration(cudf::detail::ARGMIN_SENTINEL)}; + } else { + using DeviceType = cudf::device_storage_type_t; + return k == cudf::aggregation::ARGMAX + ? static_cast(cudf::detail::ARGMAX_SENTINEL) + : static_cast(cudf::detail::ARGMIN_SENTINEL); + } + } + return identity_from_operator(); +} + +template +struct initialize_target_element { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type idx) const noexcept + { + CUDF_UNREACHABLE("Invalid source type and aggregation combination."); + } +}; + +template +struct initialize_target_element()>> { + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type idx) const noexcept + { + using DeviceType = cudf::device_storage_type_t; + DeviceType* target_casted = reinterpret_cast(target); + + target_casted[idx] = get_identity(); + + target_mask[idx] = (k == cudf::aggregation::COUNT_ALL) or (k == cudf::aggregation::COUNT_VALID); + } +}; + +struct initialize_shmem { + template + __device__ void operator()(cuda::std::byte* target, + bool* target_mask, + cudf::size_type idx) const noexcept + { + initialize_target_element{}(target, target_mask, idx); + } +}; -namespace cudf { -namespace groupby { -namespace detail { -namespace hash { /** * @brief Computes single-pass aggregations and store results into a sparse `output_values` table, * and populate `set` with indices of unique keys @@ -102,8 +179,4 @@ struct compute_single_pass_aggs_fn { } } }; - -} // namespace hash -} // namespace detail -} // namespace groupby -} // namespace cudf +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/sparse_to_dense_results.cu b/cpp/src/groupby/hash/sparse_to_dense_results.cu new file mode 100644 index 00000000000..e1c2cd22309 --- /dev/null +++ b/cpp/src/groupby/hash/sparse_to_dense_results.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hash_compound_agg_finalizer.hpp" +#include "helpers.cuh" + +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +template +void sparse_to_dense_results(host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetRef set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + for (auto const& request : requests) { + auto const& agg_v = request.aggregations; + auto const& col = request.values; + + // Given an aggregation, this will get the result from sparse_results and + // convert and return dense, compacted result + auto finalizer = hash_compound_agg_finalizer( + col, sparse_results, dense_results, gather_map, set, row_bitmask, stream, mr); + for (auto&& agg : agg_v) { + agg->finalize(finalizer); + } + } +} + +template void sparse_to_dense_results>( + host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + hash_set_ref_t set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template void sparse_to_dense_results>( + host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + nullable_hash_set_ref_t set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/hash/sparse_to_dense_results.hpp b/cpp/src/groupby/hash/sparse_to_dense_results.hpp new file mode 100644 index 00000000000..3a2b3090b99 --- /dev/null +++ b/cpp/src/groupby/hash/sparse_to_dense_results.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace cudf::groupby::detail::hash { +/** + * @brief Gather sparse aggregation results into dense using `gather_map` and add to + * `dense_results` + * + * @tparam SetRef Device hash set ref type + * + * @param[in] requests The set of columns to aggregate and the aggregations to perform + * @param[in] sparse_results Sparse aggregation results + * @param[out] dense_results Dense aggregation results + * @param[in] gather_map Gather map indicating valid elements in `sparse_results` + * @param[in] set Device hash set ref + * @param[in] row_bitmask Bitmask indicating the validity of input keys + * @param[in] stream CUDA stream used for device memory operations and kernel launches + * @param[in] mr Device memory resource used to allocate the returned table + */ +template +void sparse_to_dense_results(host_span requests, + cudf::detail::result_cache* sparse_results, + cudf::detail::result_cache* dense_results, + device_span gather_map, + SetRef set, + bitmask_type const* row_bitmask, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index a9085a1f1fd..3041e261945 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index 82d557b9f7e..d6c900fb689 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -108,7 +109,7 @@ struct quantiles_functor { auto values_view = column_device_view::create(values, stream); auto group_size_view = column_device_view::create(group_sizes, stream); auto result_view = mutable_column_device_view::create(result->mutable_view(), stream); - auto null_count = rmm::device_scalar(0, stream, mr); + auto null_count = cudf::detail::device_scalar(0, stream, mr); // For each group, calculate quantile if (!cudf::is_dictionary(values.type())) { diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 86ee20dbbe2..c3dfac46502 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include #include -#include #include #include @@ -134,7 +134,7 @@ struct var_functor { // set nulls auto result_view = mutable_column_device_view::create(*result, stream); - auto null_count = rmm::device_scalar(0, stream, mr); + auto null_count = cudf::detail::device_scalar(0, stream, mr); auto d_null_count = null_count.data(); thrust::for_each_n( rmm::exec_policy(stream), diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index c7bfd4aecf4..a0c51940c87 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -302,7 +302,8 @@ std::unique_ptr md5(table_view const& input, } return md5_leaf_type_check(col.type()); }), - "Unsupported column type for hash function."); + "Unsupported column type for hash function.", + cudf::data_type_error); // Digest size in bytes auto constexpr digest_size = 32; diff --git a/cpp/src/hash/sha_hash.cuh b/cpp/src/hash/sha_hash.cuh index ebaec8e2775..eb002cf9c6f 100644 --- a/cpp/src/hash/sha_hash.cuh +++ b/cpp/src/hash/sha_hash.cuh @@ -513,7 +513,8 @@ std::unique_ptr sha_hash(table_view const& input, CUDF_EXPECTS( std::all_of( input.begin(), input.end(), [](auto const& col) { return sha_leaf_type_check(col.type()); }), - "Unsupported column type for hash function."); + "Unsupported column type for hash function.", + cudf::data_type_error); // Result column allocation and creation auto begin = thrust::make_constant_iterator(Hasher::digest_size); diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp index a99262fb3bf..c69ebe12d2c 100644 --- a/cpp/src/interop/arrow_utilities.cpp +++ b/cpp/src/interop/arrow_utilities.cpp @@ -20,11 +20,6 @@ #include #include -#include - -#include -#include - #include namespace cudf { diff --git a/cpp/src/interop/arrow_utilities.hpp b/cpp/src/interop/arrow_utilities.hpp index 1b79fbf9eda..e4bdedf6603 100644 --- a/cpp/src/interop/arrow_utilities.hpp +++ b/cpp/src/interop/arrow_utilities.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include #include #include diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index a1be6aade4e..4395b741e53 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -16,11 +16,8 @@ #include #include #include -#include -#include #include #include -#include #include #include #include diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu index a2874b46b06..fc1b0226a48 100644 --- a/cpp/src/interop/to_arrow_device.cu +++ b/cpp/src/interop/to_arrow_device.cu @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include -#include #include #include @@ -60,7 +60,7 @@ template struct is_device_scalar : public std::false_type {}; template -struct is_device_scalar> : public std::true_type {}; +struct is_device_scalar> : public std::true_type {}; template struct is_device_uvector : public std::false_type {}; @@ -232,10 +232,10 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colum // in the offsets buffer. While some arrow implementations may accept a zero-sized // offsets buffer, best practices would be to allocate the buffer with the single value. if (nanoarrow_type == NANOARROW_TYPE_STRING) { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } else { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } @@ -466,10 +466,10 @@ int dispatch_to_arrow_device_view::operator()(ArrowArray* out if (column.size() == 0) { // https://github.com/rapidsai/cudf/pull/15047#discussion_r1546528552 if (nanoarrow_type == NANOARROW_TYPE_LARGE_STRING) { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } else { - auto zero = std::make_unique>(0, stream, mr); + auto zero = std::make_unique>(0, stream, mr); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); } diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index d5caa4720ac..b3fcca62314 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -17,7 +17,6 @@ #include "avro.hpp" #include -#include #include namespace cudf { diff --git a/cpp/src/io/avro/avro.hpp b/cpp/src/io/avro/avro.hpp index 2e992546ccc..fd2c781b8a1 100644 --- a/cpp/src/io/avro/avro.hpp +++ b/cpp/src/io/avro/avro.hpp @@ -18,11 +18,9 @@ #include "avro_common.hpp" -#include #include #include #include -#include #include #include #include diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp index 583bd6a3523..2e1cda2d6b7 100644 --- a/cpp/src/io/comp/nvcomp_adapter.hpp +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -18,9 +18,7 @@ #include "gpuinflate.hpp" -#include #include -#include #include #include diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 1af45b41d8e..fb8c308065d 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -24,8 +24,6 @@ #include #include -#include - #include // uncompress #include // memset @@ -538,8 +536,10 @@ size_t decompress_zstd(host_span src, CUDF_EXPECTS(hd_stats[0].status == compression_status::SUCCESS, "ZSTD decompression failed"); // Copy temporary output to `dst` - CUDF_CUDA_TRY(cudaMemcpyAsync( - dst.data(), d_dst.data(), hd_stats[0].bytes_written, cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + dst.subspan(0, hd_stats[0].bytes_written), + device_span{d_dst.data(), hd_stats[0].bytes_written}, + stream); return hd_stats[0].bytes_written; } diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 8c32fc85f78..72fca75c56b 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -21,6 +21,7 @@ #include "csv_common.hpp" #include "csv_gpu.hpp" +#include "cudf/detail/utilities/cuda_memcpy.hpp" #include "io/comp/io_uncomp.hpp" #include "io/utilities/column_buffer.hpp" #include "io/utilities/hostdevice_vector.hpp" @@ -275,11 +276,10 @@ std::pair, selected_rows_offsets> load_data_and_gather auto const read_offset = byte_range_offset + input_pos + previous_data_size; auto const read_size = target_pos - input_pos - previous_data_size; if (data.has_value()) { - CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data() + previous_data_size, - data->data() + read_offset, - target_pos - input_pos - previous_data_size, - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{d_data.data() + previous_data_size, read_size}, + data->subspan(read_offset, read_size), + stream); } else { if (source->is_device_read_preferred(read_size)) { source->device_read(read_offset, @@ -288,12 +288,11 @@ std::pair, selected_rows_offsets> load_data_and_gather stream); } else { auto const buffer = source->host_read(read_offset, read_size); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data() + previous_data_size, - buffer->data(), - buffer->size(), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); // To prevent buffer going out of scope before we copy the data. + // Use sync version to prevent buffer going out of scope before we copy the data. + cudf::detail::cuda_memcpy( + device_span{d_data.data() + previous_data_size, read_size}, + host_span{reinterpret_cast(buffer->data()), buffer->size()}, + stream); } } @@ -311,12 +310,10 @@ std::pair, selected_rows_offsets> load_data_and_gather range_end, skip_rows, stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_ctx.device_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + + cudf::detail::cuda_memcpy(host_span{row_ctx}.subspan(0, num_blocks), + device_span{row_ctx}.subspan(0, num_blocks), + stream); // Sum up the rows in each character block, selecting the row count that // corresponds to the current input context. Also stores the now known input @@ -331,11 +328,9 @@ std::pair, selected_rows_offsets> load_data_and_gather // At least one row in range in this batch all_row_offsets.resize(total_rows - skip_rows, stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.device_ptr(), - row_ctx.host_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async(device_span{row_ctx}.subspan(0, num_blocks), + host_span{row_ctx}.subspan(0, num_blocks), + stream); // Pass 2: Output row offsets cudf::io::csv::gpu::gather_row_offsets(parse_opts.view(), @@ -352,12 +347,9 @@ std::pair, selected_rows_offsets> load_data_and_gather stream); // With byte range, we want to keep only one row out of the specified range if (range_end < data_size) { - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_ctx.device_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + cudf::detail::cuda_memcpy(host_span{row_ctx}.subspan(0, num_blocks), + device_span{row_ctx}.subspan(0, num_blocks), + stream); size_t rows_out_of_range = 0; for (uint32_t i = 0; i < num_blocks; i++) { @@ -401,12 +393,9 @@ std::pair, selected_rows_offsets> load_data_and_gather // Remove header rows and extract header auto const header_row_index = std::max(header_rows, 1) - 1; if (header_row_index + 1 < row_offsets.size()) { - CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_offsets.data() + header_row_index, - 2 * sizeof(uint64_t), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + cudf::detail::cuda_memcpy(host_span{row_ctx}.subspan(0, 2), + device_span{row_offsets.data() + header_row_index, 2}, + stream); auto const header_start = input_pos + row_ctx[0]; auto const header_end = input_pos + row_ctx[1]; diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index b84446b5f3e..2bbe05ced84 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -405,13 +406,8 @@ void write_chunked(data_sink* out_sink, out_sink->device_write(ptr_all_bytes, total_num_bytes, stream); } else { // copy the bytes to host to write them out - thrust::host_vector h_bytes(total_num_bytes); - CUDF_CUDA_TRY(cudaMemcpyAsync(h_bytes.data(), - ptr_all_bytes, - total_num_bytes * sizeof(char), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + auto const h_bytes = cudf::detail::make_host_vector_sync( + device_span{ptr_all_bytes, total_num_bytes}, stream); out_sink->host_write(h_bytes.data(), total_num_bytes); } diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index a8682e6a760..ceaeb5d8f85 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -32,10 +32,8 @@ #include #include #include -#include #include #include -#include #include #include diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu index f7e8134b68d..570a00cbfc2 100644 --- a/cpp/src/io/json/host_tree_algorithms.cu +++ b/cpp/src/io/json/host_tree_algorithms.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -134,12 +135,13 @@ std::vector copy_strings_to_host_sync( // build std::string vector from chars and offsets std::vector host_data; host_data.reserve(col.size()); - std::transform( - std::begin(h_offsets), - std::end(h_offsets) - 1, - std::begin(h_offsets) + 1, - std::back_inserter(host_data), - [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); + std::transform(std::begin(h_offsets), + std::end(h_offsets) - 1, + std::begin(h_offsets) + 1, + std::back_inserter(host_data), + [&h_chars](auto start, auto end) { + return std::string(h_chars.data() + start, end - start); + }); return host_data; }; return to_host(d_column_names->view()); @@ -170,636 +172,78 @@ rmm::device_uvector is_all_nulls_each_column(device_span rmm::device_uvector is_all_nulls(num_cols, stream); thrust::fill(rmm::exec_policy_nosync(stream), is_all_nulls.begin(), is_all_nulls.end(), true); - auto parse_opt = parsing_options(options, stream); - thrust::for_each_n( - rmm::exec_policy_nosync(stream), - thrust::counting_iterator(0), - num_nodes, - [options = parse_opt.view(), - data = input.data(), - column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin(), - range_begin = tree.node_range_begin.begin(), - range_end = tree.node_range_end.begin(), - is_all_nulls = is_all_nulls.begin()] __device__(size_type i) { - auto const node_category = column_categories[col_ids[i]]; - if (node_category == NC_STR or node_category == NC_VAL) { - auto const is_null_literal = serialized_trie_contains( - options.trie_na, - {data + range_begin[i], static_cast(range_end[i] - range_begin[i])}); - if (!is_null_literal) is_all_nulls[col_ids[i]] = false; - } - }); - return is_all_nulls; -} - -NodeIndexT get_row_array_parent_col_id(device_span col_ids, - bool is_enabled_lines, - rmm::cuda_stream_view stream) -{ - NodeIndexT value = parent_node_sentinel; - if (!col_ids.empty()) { - auto const list_node_index = is_enabled_lines ? 0 : 1; - CUDF_CUDA_TRY(cudaMemcpyAsync(&value, - col_ids.data() + list_node_index, - sizeof(NodeIndexT), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); - } - return value; -} -/** - * @brief Holds member data pointers of `d_json_column` - * - */ -struct json_column_data { - using row_offset_t = json_column::row_offset_t; - row_offset_t* string_offsets; - row_offset_t* string_lengths; - row_offset_t* child_offsets; - bitmask_type* validity; -}; - -using hashmap_of_device_columns = - std::unordered_map>; - -std::pair, hashmap_of_device_columns> build_tree( - device_json_column& root, - host_span is_str_column_all_nulls, - tree_meta_t& d_column_tree, - device_span d_unique_col_ids, - device_span d_max_row_offsets, - std::vector const& column_names, - NodeIndexT row_array_parent_col_id, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); -void scatter_offsets(tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_span node_ids, - device_span sorted_col_ids, // Reuse this for parent_col_ids - tree_meta_t const& d_column_tree, - host_span ignore_vals, - hashmap_of_device_columns const& columns, - rmm::cuda_stream_view stream); - -/** - * @brief Constructs `d_json_column` from node tree representation - * Newly constructed columns are inserted into `root`'s children. - * `root` must be a list type. - * - * @param input Input JSON string device data - * @param tree Node tree representation of the JSON string - * @param col_ids Column ids of the nodes in the tree - * @param row_offsets Row offsets of the nodes in the tree - * @param root Root node of the `d_json_column` tree - * @param is_array_of_arrays Whether the tree is an array of arrays - * @param options Parsing options specifying the parsing behaviour - * options affecting behaviour are - * is_enabled_lines: Whether the input is a line-delimited JSON - * is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the device memory - * of child_offets and validity members of `d_json_column` - */ -void make_device_json_column(device_span input, - tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_json_column& root, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - bool const is_enabled_lines = options.is_enabled_lines(); - bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); - // make a copy - auto sorted_col_ids = cudf::detail::make_device_uvector_async( - col_ids, stream, cudf::get_current_device_resource_ref()); - - // sort by {col_id} on {node_ids} stable - rmm::device_uvector node_ids(col_ids.size(), stream); - thrust::sequence(rmm::exec_policy_nosync(stream), node_ids.begin(), node_ids.end()); - thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - node_ids.begin()); - - NodeIndexT const row_array_parent_col_id = - get_row_array_parent_col_id(col_ids, is_enabled_lines, stream); - - // 1. gather column information. - auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = - reduce_to_column_tree(tree, - col_ids, - sorted_col_ids, - node_ids, - row_offsets, - is_array_of_arrays, - row_array_parent_col_id, - stream); - auto num_columns = d_unique_col_ids.size(); - std::vector column_names = copy_strings_to_host_sync( - input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream); - // array of arrays column names - if (is_array_of_arrays) { - auto const unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); - auto const column_parent_ids = - cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); - TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2; - auto values_column_indices = - get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream); - auto h_values_column_indices = - cudf::detail::make_host_vector_sync(values_column_indices, stream); - std::transform(unique_col_ids.begin(), - unique_col_ids.end(), - column_names.cbegin(), - column_names.begin(), - [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id]( - auto col_id, auto name) mutable { - return column_parent_ids[col_id] == row_array_parent_col_id - ? std::to_string(h_values_column_indices[col_id]) - : name; - }); - } - - auto const is_str_column_all_nulls = [&, &column_tree = d_column_tree]() { - if (is_enabled_mixed_types_as_string) { - return cudf::detail::make_std_vector_sync( - is_all_nulls_each_column(input, column_tree, tree, col_ids, options, stream), stream); - } - return std::vector(); - }(); - auto const [ignore_vals, columns] = build_tree(root, - is_str_column_all_nulls, - d_column_tree, - d_unique_col_ids, - d_max_row_offsets, - column_names, - row_array_parent_col_id, - is_array_of_arrays, - options, - stream, - mr); - - scatter_offsets(tree, - col_ids, - row_offsets, - node_ids, - sorted_col_ids, - d_column_tree, - ignore_vals, - columns, - stream); -} - -std::pair, hashmap_of_device_columns> build_tree( - device_json_column& root, - host_span is_str_column_all_nulls, - tree_meta_t& d_column_tree, - device_span d_unique_col_ids, - device_span d_max_row_offsets, - std::vector const& column_names, - NodeIndexT row_array_parent_col_id, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); - auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); - auto column_categories = - cudf::detail::make_host_vector_async(d_column_tree.node_categories, stream); - auto const column_parent_ids = - cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); - auto column_range_beg = - cudf::detail::make_host_vector_async(d_column_tree.node_range_begin, stream); - auto const max_row_offsets = cudf::detail::make_host_vector_async(d_max_row_offsets, stream); - auto num_columns = d_unique_col_ids.size(); - stream.synchronize(); - - auto to_json_col_type = [](auto category) { - switch (category) { - case NC_STRUCT: return json_col_t::StructColumn; - case NC_LIST: return json_col_t::ListColumn; - case NC_STR: [[fallthrough]]; - case NC_VAL: return json_col_t::StringColumn; - default: return json_col_t::Unknown; - } - }; - auto init_to_zero = [stream](auto& v) { - thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), v.begin(), v.end(), 0); - }; - - auto initialize_json_columns = [&](auto i, auto& col, auto column_category) { - if (column_category == NC_ERR || column_category == NC_FN) { - return; - } else if (column_category == NC_VAL || column_category == NC_STR) { - col.string_offsets.resize(max_row_offsets[i] + 1, stream); - col.string_lengths.resize(max_row_offsets[i] + 1, stream); - init_to_zero(col.string_offsets); - init_to_zero(col.string_lengths); - } else if (column_category == NC_LIST) { - col.child_offsets.resize(max_row_offsets[i] + 2, stream); - init_to_zero(col.child_offsets); - } - col.num_rows = max_row_offsets[i] + 1; - col.validity = - cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); - col.type = to_json_col_type(column_category); - }; - - auto reinitialize_as_string = [&](auto i, auto& col) { - col.string_offsets.resize(max_row_offsets[i] + 1, stream); - col.string_lengths.resize(max_row_offsets[i] + 1, stream); - init_to_zero(col.string_offsets); - init_to_zero(col.string_lengths); - col.num_rows = max_row_offsets[i] + 1; - col.validity = - cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); - col.type = json_col_t::StringColumn; - // destroy references of all child columns after this step, by calling remove_child_columns - }; - - path_from_tree tree_path{column_categories, - column_parent_ids, - column_names, - is_array_of_arrays, - row_array_parent_col_id}; - - // 2. generate nested columns tree and its device_memory - // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order. - auto h_range_col_id_it = - thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin()); - std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { - return thrust::get<0>(a) < thrust::get<0>(b); - }); - - // use hash map because we may skip field name's col_ids - hashmap_of_device_columns columns; - // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking - std::map, NodeIndexT> mapped_columns; - // find column_ids which are values, but should be ignored in validity - auto ignore_vals = cudf::detail::make_host_vector(num_columns, stream); - std::fill(ignore_vals.begin(), ignore_vals.end(), false); - std::vector is_mixed_type_column(num_columns, 0); - std::vector is_pruned(num_columns, 0); - // for columns that are not mixed type but have been forced as string - std::vector forced_as_string_column(num_columns); - columns.try_emplace(parent_node_sentinel, std::ref(root)); - - std::function remove_child_columns = - [&](NodeIndexT this_col_id, device_json_column& col) { - for (auto const& col_name : col.column_order) { - auto child_id = mapped_columns[{this_col_id, col_name}]; - is_mixed_type_column[child_id] = 1; - remove_child_columns(child_id, col.child_columns.at(col_name)); - mapped_columns.erase({this_col_id, col_name}); - columns.erase(child_id); - } - col.child_columns.clear(); // their references are deleted above. - col.column_order.clear(); - }; - - auto name_and_parent_index = [&is_array_of_arrays, - &row_array_parent_col_id, - &column_parent_ids, - &column_categories, - &column_names](auto this_col_id) { - std::string name = ""; - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) { - if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) { - name = column_names[this_col_id]; - } else { - name = list_child_name; - } - } else if (column_categories[parent_col_id] == NC_FN) { - auto field_name_col_id = parent_col_id; - parent_col_id = column_parent_ids[parent_col_id]; - name = column_names[field_name_col_id]; - } else { - CUDF_FAIL("Unexpected parent column category"); - } - return std::pair{name, parent_col_id}; - }; - - // Prune columns that are not required to be parsed. - if (options.is_enabled_prune_columns()) { - for (auto const this_col_id : unique_col_ids) { - if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { - continue; - } - // Struct, List, String, Value - auto [name, parent_col_id] = name_and_parent_index(this_col_id); - // get path of this column, and get its dtype if present in options - auto const nt = tree_path.get_path(this_col_id); - std::optional const user_dtype = get_path_data_type(nt, options); - if (!user_dtype.has_value() and parent_col_id != parent_node_sentinel) { - is_pruned[this_col_id] = 1; - continue; - } else { - // make sure all its parents are not pruned. - while (parent_col_id != parent_node_sentinel and is_pruned[parent_col_id] == 1) { - is_pruned[parent_col_id] = 0; - parent_col_id = column_parent_ids[parent_col_id]; - } - } - } - } - - // Build the column tree, also, handles mixed types. - for (auto const this_col_id : unique_col_ids) { - if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { - continue; - } - // Struct, List, String, Value - auto [name, parent_col_id] = name_and_parent_index(this_col_id); - - // if parent is mixed type column or this column is pruned or if parent - // has been forced as string, ignore this column. - if (parent_col_id != parent_node_sentinel && - (is_mixed_type_column[parent_col_id] || is_pruned[this_col_id]) || - forced_as_string_column[parent_col_id]) { - ignore_vals[this_col_id] = true; - if (is_mixed_type_column[parent_col_id]) { is_mixed_type_column[this_col_id] = 1; } - if (forced_as_string_column[parent_col_id]) { forced_as_string_column[this_col_id] = true; } - continue; - } - - // If the child is already found, - // replace if this column is a nested column and the existing was a value column - // ignore this column if this column is a value column and the existing was a nested column - auto it = columns.find(parent_col_id); - CUDF_EXPECTS(it != columns.end(), "Parent column not found"); - auto& parent_col = it->second.get(); - bool replaced = false; - if (mapped_columns.count({parent_col_id, name}) > 0) { - auto const old_col_id = mapped_columns[{parent_col_id, name}]; - // If mixed type as string is enabled, make both of them strings and merge them. - // All child columns will be ignored when parsing. - if (is_enabled_mixed_types_as_string) { - bool const is_mixed_type = [&]() { - // If new or old is STR and they are all not null, make it mixed type, else ignore. - if (column_categories[this_col_id] == NC_VAL || - column_categories[this_col_id] == NC_STR) { - if (is_str_column_all_nulls[this_col_id]) return false; - } - if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { - if (is_str_column_all_nulls[old_col_id]) return false; - } - return true; - }(); - if (is_mixed_type) { - is_mixed_type_column[this_col_id] = 1; - is_mixed_type_column[old_col_id] = 1; - // if old col type (not cat) is list or struct, replace with string. - auto& col = columns.at(old_col_id).get(); - if (col.type == json_col_t::ListColumn or col.type == json_col_t::StructColumn) { - reinitialize_as_string(old_col_id, col); - remove_child_columns(old_col_id, col); - // all its children (which are already inserted) are ignored later. - } - col.forced_as_string_column = true; - columns.try_emplace(this_col_id, columns.at(old_col_id)); - continue; - } - } - - if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) { - ignore_vals[this_col_id] = true; - continue; - } - if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { - // remap - ignore_vals[old_col_id] = true; - mapped_columns.erase({parent_col_id, name}); - columns.erase(old_col_id); - parent_col.child_columns.erase(name); - replaced = true; // to skip duplicate name in column_order - } else { - // If this is a nested column but we're trying to insert either (a) a list node into a - // struct column or (b) a struct node into a list column, we fail - CUDF_EXPECTS(not((column_categories[old_col_id] == NC_LIST and - column_categories[this_col_id] == NC_STRUCT) or - (column_categories[old_col_id] == NC_STRUCT and - column_categories[this_col_id] == NC_LIST)), - "A mix of lists and structs within the same column is not supported"); - } - } - - auto this_column_category = column_categories[this_col_id]; - // get path of this column, check if it is a struct/list forced as string, and enforce it - auto const nt = tree_path.get_path(this_col_id); - std::optional const user_dtype = get_path_data_type(nt, options); - if ((column_categories[this_col_id] == NC_STRUCT or - column_categories[this_col_id] == NC_LIST) and - user_dtype.has_value() and user_dtype.value().id() == type_id::STRING) { - this_column_category = NC_STR; - } - - CUDF_EXPECTS(parent_col.child_columns.count(name) == 0, "duplicate column name: " + name); - // move into parent - device_json_column col(stream, mr); - initialize_json_columns(this_col_id, col, this_column_category); - if ((column_categories[this_col_id] == NC_STRUCT or - column_categories[this_col_id] == NC_LIST) and - user_dtype.has_value() and user_dtype.value().id() == type_id::STRING) { - col.forced_as_string_column = true; - forced_as_string_column[this_col_id] = true; - } - - auto inserted = parent_col.child_columns.try_emplace(name, std::move(col)).second; - CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent"); - if (not replaced) parent_col.column_order.push_back(name); - columns.try_emplace(this_col_id, std::ref(parent_col.child_columns.at(name))); - mapped_columns.try_emplace(std::make_pair(parent_col_id, name), this_col_id); - } - - if (is_enabled_mixed_types_as_string) { - // ignore all children of mixed type columns - for (auto const this_col_id : unique_col_ids) { - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 1) { - is_mixed_type_column[this_col_id] = 1; - ignore_vals[this_col_id] = true; - columns.erase(this_col_id); - } - // Convert only mixed type columns as string (so to copy), but not its children - if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 0 and - is_mixed_type_column[this_col_id] == 1) - column_categories[this_col_id] = NC_STR; - } - cudf::detail::cuda_memcpy_async( - d_column_tree.node_categories, column_categories, stream); - } - - // ignore all children of columns forced as string - for (auto const this_col_id : unique_col_ids) { - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id != parent_node_sentinel and forced_as_string_column[parent_col_id]) { - forced_as_string_column[this_col_id] = true; - ignore_vals[this_col_id] = true; - } - // Convert only mixed type columns as string (so to copy), but not its children - if (parent_col_id != parent_node_sentinel and not forced_as_string_column[parent_col_id] and - forced_as_string_column[this_col_id]) - column_categories[this_col_id] = NC_STR; - } - cudf::detail::cuda_memcpy_async(d_column_tree.node_categories, column_categories, stream); - - // restore unique_col_ids order - std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { - return thrust::get<1>(a) < thrust::get<1>(b); - }); - return {ignore_vals, columns}; -} - -void scatter_offsets(tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_span node_ids, - device_span sorted_col_ids, // Reuse this for parent_col_ids - tree_meta_t const& d_column_tree, - host_span ignore_vals, - hashmap_of_device_columns const& columns, - rmm::cuda_stream_view stream) -{ - auto const num_nodes = col_ids.size(); - auto const num_columns = d_column_tree.node_categories.size(); - // move columns data to device. - auto columns_data = cudf::detail::make_host_vector(num_columns, stream); - for (auto& [col_id, col_ref] : columns) { - if (col_id == parent_node_sentinel) continue; - auto& col = col_ref.get(); - columns_data[col_id] = json_column_data{col.string_offsets.data(), - col.string_lengths.data(), - col.child_offsets.data(), - static_cast(col.validity.data())}; - } - - auto d_ignore_vals = cudf::detail::make_device_uvector_async( - ignore_vals, stream, cudf::get_current_device_resource_ref()); - auto d_columns_data = cudf::detail::make_device_uvector_async( - columns_data, stream, cudf::get_current_device_resource_ref()); - - // 3. scatter string offsets to respective columns, set validity bits - thrust::for_each_n( - rmm::exec_policy_nosync(stream), - thrust::counting_iterator(0), - num_nodes, - [column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin(), - row_offsets = row_offsets.begin(), - range_begin = tree.node_range_begin.begin(), - range_end = tree.node_range_end.begin(), - d_ignore_vals = d_ignore_vals.begin(), - d_columns_data = d_columns_data.begin()] __device__(size_type i) { - if (d_ignore_vals[col_ids[i]]) return; - auto const node_category = column_categories[col_ids[i]]; - switch (node_category) { - case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; - case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; - case NC_STR: [[fallthrough]]; - case NC_VAL: - if (d_ignore_vals[col_ids[i]]) break; - set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); - d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i]; - d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i]; - break; - default: break; - } - }); - - // 4. scatter List offset - // copy_if only node's whose parent is list, (node_id, parent_col_id) - // stable_sort by parent_col_id of {node_id}. - // For all unique parent_node_id of (i==0, i-1!=i), write start offset. - // (i==last, i+1!=i), write end offset. - // unique_copy_by_key {parent_node_id} {row_offset} to - // col[parent_col_id].child_offsets[row_offset[parent_node_id]] - - auto& parent_col_ids = sorted_col_ids; // reuse sorted_col_ids - auto parent_col_id = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - cuda::proclaim_return_type( - [col_ids = col_ids.begin(), - parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) { - return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel - : col_ids[parent_node_ids[node_id]]; - })); - auto const list_children_end = thrust::copy_if( - rmm::exec_policy_nosync(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id) + - num_nodes, - thrust::make_counting_iterator(0), - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), - [d_ignore_vals = d_ignore_vals.begin(), - parent_node_ids = tree.parent_node_ids.begin(), - column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin()] __device__(size_type node_id) { - auto parent_node_id = parent_node_ids[node_id]; - return parent_node_id != parent_node_sentinel and - column_categories[col_ids[parent_node_id]] == NC_LIST and - (!d_ignore_vals[col_ids[parent_node_id]]); - }); - - auto const num_list_children = - list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()); - thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), - parent_col_ids.begin(), - parent_col_ids.begin() + num_list_children, - node_ids.begin()); + auto parse_opt = parsing_options(options, stream); thrust::for_each_n( rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - num_list_children, - [node_ids = node_ids.begin(), - parent_node_ids = tree.parent_node_ids.begin(), - parent_col_ids = parent_col_ids.begin(), - row_offsets = row_offsets.begin(), - d_columns_data = d_columns_data.begin(), - num_list_children] __device__(size_type i) { - auto const node_id = node_ids[i]; - auto const parent_node_id = parent_node_ids[node_id]; - // scatter to list_offset - if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) { - d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] = - row_offsets[node_id]; - } - // last value of list child_offset is its size. - if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) { - d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] = - row_offsets[node_id] + 1; + thrust::counting_iterator(0), + num_nodes, + [options = parse_opt.view(), + data = input.data(), + column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin(), + range_begin = tree.node_range_begin.begin(), + range_end = tree.node_range_end.begin(), + is_all_nulls = is_all_nulls.begin()] __device__(size_type i) { + auto const node_category = column_categories[col_ids[i]]; + if (node_category == NC_STR or node_category == NC_VAL) { + auto const is_null_literal = serialized_trie_contains( + options.trie_na, + {data + range_begin[i], static_cast(range_end[i] - range_begin[i])}); + if (!is_null_literal) is_all_nulls[col_ids[i]] = false; } }); + return is_all_nulls; +} - // 5. scan on offsets. - for (auto& [id, col_ref] : columns) { - auto& col = col_ref.get(); - if (col.type == json_col_t::StringColumn) { - thrust::inclusive_scan(rmm::exec_policy_nosync(stream), - col.string_offsets.begin(), - col.string_offsets.end(), - col.string_offsets.begin(), - thrust::maximum{}); - } else if (col.type == json_col_t::ListColumn) { - thrust::inclusive_scan(rmm::exec_policy_nosync(stream), - col.child_offsets.begin(), - col.child_offsets.end(), - col.child_offsets.begin(), - thrust::maximum{}); - } - } - stream.synchronize(); +NodeIndexT get_row_array_parent_col_id(device_span col_ids, + bool is_enabled_lines, + rmm::cuda_stream_view stream) +{ + if (col_ids.empty()) { return parent_node_sentinel; } + + auto const list_node_index = is_enabled_lines ? 0 : 1; + auto const value = cudf::detail::make_host_vector_sync( + device_span{col_ids.data() + list_node_index, 1}, stream); + + return value[0]; } +/** + * @brief Holds member data pointers of `d_json_column` + * + */ +struct json_column_data { + using row_offset_t = json_column::row_offset_t; + row_offset_t* string_offsets; + row_offset_t* string_lengths; + row_offset_t* child_offsets; + bitmask_type* validity; +}; + +using hashmap_of_device_columns = + std::unordered_map>; + +std::pair, hashmap_of_device_columns> build_tree( + device_json_column& root, + host_span is_str_column_all_nulls, + tree_meta_t& d_column_tree, + device_span d_unique_col_ids, + device_span d_max_row_offsets, + std::vector const& column_names, + NodeIndexT row_array_parent_col_id, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); -namespace experimental { +void scatter_offsets(tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_span node_ids, + device_span sorted_col_ids, // Reuse this for parent_col_ids + tree_meta_t const& d_column_tree, + host_span ignore_vals, + hashmap_of_device_columns const& columns, + rmm::cuda_stream_view stream); std::map unified_schema(cudf::io::json_reader_options const& options) { @@ -829,19 +273,6 @@ std::map unified_schema(cudf::io::json_reader_optio options.get_dtypes()); } -std::pair, hashmap_of_device_columns> build_tree( - device_json_column& root, - host_span is_str_column_all_nulls, - tree_meta_t& d_column_tree, - device_span d_unique_col_ids, - device_span d_max_row_offsets, - std::vector const& column_names, - NodeIndexT row_array_parent_col_id, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - /** * @brief Constructs `d_json_column` from node tree representation * Newly constructed columns are inserted into `root`'s children. @@ -1033,7 +464,7 @@ std::pair, hashmap_of_device_columns> build_tree std::fill_n(is_pruned.begin(), num_columns, options.is_enabled_prune_columns()); // prune all children of a column, but not self. - auto ignore_all_children = [&](auto parent_col_id) { + auto ignore_all_children = [&adj, &is_pruned](auto parent_col_id) { std::deque offspring; if (adj.count(parent_col_id)) { for (auto const& child : adj[parent_col_id]) { @@ -1384,14 +815,149 @@ std::pair, hashmap_of_device_columns> build_tree column_categories.cbegin(), expected_types.begin(), [](auto exp, auto cat) { return exp == NUM_NODE_CLASSES ? cat : exp; }); - cudaMemcpyAsync(d_column_tree.node_categories.begin(), - expected_types.data(), - expected_types.size() * sizeof(column_categories[0]), - cudaMemcpyDefault, - stream.value()); + cudf::detail::cuda_memcpy_async(d_column_tree.node_categories, expected_types, stream); return {is_pruned, columns}; } -} // namespace experimental + +void scatter_offsets(tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_span node_ids, + device_span sorted_col_ids, // Reuse this for parent_col_ids + tree_meta_t const& d_column_tree, + host_span ignore_vals, + hashmap_of_device_columns const& columns, + rmm::cuda_stream_view stream) +{ + auto const num_nodes = col_ids.size(); + auto const num_columns = d_column_tree.node_categories.size(); + // move columns data to device. + auto columns_data = cudf::detail::make_host_vector(num_columns, stream); + for (auto& [col_id, col_ref] : columns) { + if (col_id == parent_node_sentinel) continue; + auto& col = col_ref.get(); + columns_data[col_id] = json_column_data{col.string_offsets.data(), + col.string_lengths.data(), + col.child_offsets.data(), + static_cast(col.validity.data())}; + } + + auto d_ignore_vals = cudf::detail::make_device_uvector_async( + ignore_vals, stream, cudf::get_current_device_resource_ref()); + auto d_columns_data = cudf::detail::make_device_uvector_async( + columns_data, stream, cudf::get_current_device_resource_ref()); + + // 3. scatter string offsets to respective columns, set validity bits + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::counting_iterator(0), + num_nodes, + [column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin(), + row_offsets = row_offsets.begin(), + range_begin = tree.node_range_begin.begin(), + range_end = tree.node_range_end.begin(), + d_ignore_vals = d_ignore_vals.begin(), + d_columns_data = d_columns_data.begin()] __device__(size_type i) { + if (d_ignore_vals[col_ids[i]]) return; + auto const node_category = column_categories[col_ids[i]]; + switch (node_category) { + case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; + case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; + case NC_STR: [[fallthrough]]; + case NC_VAL: + if (d_ignore_vals[col_ids[i]]) break; + set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); + d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i]; + d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i]; + break; + default: break; + } + }); + + // 4. scatter List offset + // copy_if only node's whose parent is list, (node_id, parent_col_id) + // stable_sort by parent_col_id of {node_id}. + // For all unique parent_node_id of (i==0, i-1!=i), write start offset. + // (i==last, i+1!=i), write end offset. + // unique_copy_by_key {parent_node_id} {row_offset} to + // col[parent_col_id].child_offsets[row_offset[parent_node_id]] + + auto& parent_col_ids = sorted_col_ids; // reuse sorted_col_ids + auto parent_col_id = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [col_ids = col_ids.begin(), + parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) { + return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel + : col_ids[parent_node_ids[node_id]]; + })); + auto const list_children_end = thrust::copy_if( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id) + + num_nodes, + thrust::make_counting_iterator(0), + thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), + [d_ignore_vals = d_ignore_vals.begin(), + parent_node_ids = tree.parent_node_ids.begin(), + column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin()] __device__(size_type node_id) { + auto parent_node_id = parent_node_ids[node_id]; + return parent_node_id != parent_node_sentinel and + column_categories[col_ids[parent_node_id]] == NC_LIST and + (!d_ignore_vals[col_ids[parent_node_id]]); + }); + + auto const num_list_children = + list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()); + thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), + parent_col_ids.begin(), + parent_col_ids.begin() + num_list_children, + node_ids.begin()); + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + num_list_children, + [node_ids = node_ids.begin(), + parent_node_ids = tree.parent_node_ids.begin(), + parent_col_ids = parent_col_ids.begin(), + row_offsets = row_offsets.begin(), + d_columns_data = d_columns_data.begin(), + num_list_children] __device__(size_type i) { + auto const node_id = node_ids[i]; + auto const parent_node_id = parent_node_ids[node_id]; + // scatter to list_offset + if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) { + d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] = + row_offsets[node_id]; + } + // last value of list child_offset is its size. + if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) { + d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] = + row_offsets[node_id] + 1; + } + }); + + // 5. scan on offsets. + for (auto& [id, col_ref] : columns) { + auto& col = col_ref.get(); + if (col.type == json_col_t::StringColumn) { + thrust::inclusive_scan(rmm::exec_policy_nosync(stream), + col.string_offsets.begin(), + col.string_offsets.end(), + col.string_offsets.begin(), + thrust::maximum{}); + } else if (col.type == json_col_t::ListColumn) { + thrust::inclusive_scan(rmm::exec_policy_nosync(stream), + col.child_offsets.begin(), + col.child_offsets.end(), + col.child_offsets.begin(), + thrust::maximum{}); + } + } + stream.synchronize(); +} } // namespace cudf::io::json::detail diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 912e93d52ae..7e4d975e431 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -485,16 +485,6 @@ std::pair, std::vector> device_json_co } } -template -auto make_device_json_column_dispatch(bool experimental, Args&&... args) -{ - if (experimental) { - return experimental::make_device_json_column(std::forward(args)...); - } else { - return make_device_json_column(std::forward(args)...); - } -} - table_with_metadata device_parse_nested_json(device_span d_input, cudf::io::json_reader_options const& options, rmm::cuda_stream_view stream, @@ -523,16 +513,14 @@ table_with_metadata device_parse_nested_json(device_span d_input, #endif bool const is_array_of_arrays = [&]() { - std::array h_node_categories = {NC_ERR, NC_ERR}; - auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); - CUDF_CUDA_TRY(cudaMemcpyAsync(h_node_categories.data(), - gpu_tree.node_categories.data(), - sizeof(node_t) * size_to_copy, - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); + if (size_to_copy == 0) return false; + auto const h_node_categories = cudf::detail::make_host_vector_sync( + device_span{gpu_tree.node_categories.data(), size_to_copy}, stream); + if (options.is_enabled_lines()) return h_node_categories[0] == NC_LIST; - return h_node_categories[0] == NC_LIST and h_node_categories[1] == NC_LIST; + return h_node_categories.size() >= 2 and h_node_categories[0] == NC_LIST and + h_node_categories[1] == NC_LIST; }(); auto [gpu_col_id, gpu_row_offsets] = @@ -553,16 +541,15 @@ table_with_metadata device_parse_nested_json(device_span d_input, 0); // Get internal JSON column - make_device_json_column_dispatch(options.is_enabled_experimental(), - d_input, - gpu_tree, - gpu_col_id, - gpu_row_offsets, - root_column, - is_array_of_arrays, - options, - stream, - mr); + make_device_json_column(d_input, + gpu_tree, + gpu_col_id, + gpu_row_offsets, + root_column, + is_array_of_arrays, + options, + stream, + mr); // data_root refers to the root column of the data represented by the given JSON string auto& data_root = diff --git a/cpp/src/io/json/json_normalization.cu b/cpp/src/io/json/json_normalization.cu index 2d435dc8e1a..34a87918e57 100644 --- a/cpp/src/io/json/json_normalization.cu +++ b/cpp/src/io/json/json_normalization.cu @@ -16,6 +16,7 @@ #include "io/fst/lookup_tables.cuh" +#include #include #include #include @@ -24,7 +25,6 @@ #include #include -#include #include #include @@ -316,7 +316,7 @@ void normalize_single_quotes(datasource::owning_buffer& inda stream); rmm::device_buffer outbuf(indata.size() * 2, stream, mr); - rmm::device_scalar outbuf_size(stream, mr); + cudf::detail::device_scalar outbuf_size(stream, mr); parser.Transduce(reinterpret_cast(indata.data()), static_cast(indata.size()), static_cast(outbuf.data()), @@ -401,7 +401,7 @@ std:: stream); rmm::device_uvector outbuf_indices(inbuf.size(), stream, mr); - rmm::device_scalar outbuf_indices_size(stream, mr); + cudf::detail::device_scalar outbuf_indices_size(stream, mr); parser.Transduce(inbuf.data(), static_cast(inbuf.size()), thrust::make_discard_iterator(), diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index d949635c1cc..e2fe926ea19 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -264,16 +264,13 @@ tree_meta_t get_tree_representation(device_span tokens, error_count > 0) { auto const error_location = thrust::find(rmm::exec_policy(stream), tokens.begin(), tokens.end(), token_t::ErrorBegin); - SymbolOffsetT error_index; - CUDF_CUDA_TRY( - cudaMemcpyAsync(&error_index, - token_indices.data() + thrust::distance(tokens.begin(), error_location), - sizeof(SymbolOffsetT), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); + auto error_index = cudf::detail::make_host_vector_sync( + device_span{ + token_indices.data() + thrust::distance(tokens.begin(), error_location), 1}, + stream); + CUDF_FAIL("JSON Parser encountered an invalid format at location " + - std::to_string(error_index)); + std::to_string(error_index[0])); } auto const num_tokens = tokens.size(); diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 3d9a51833e0..7b3b04dea16 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -19,10 +19,7 @@ #include #include #include -#include -#include #include -#include #include #include @@ -405,21 +402,6 @@ void make_device_json_column(device_span input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -namespace experimental { -/** - * @copydoc cudf::io::json::detail::make_device_json_column - */ -void make_device_json_column(device_span input, - tree_meta_t const& tree, - device_span col_ids, - device_span row_offsets, - device_json_column& root, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); -} // namespace experimental - /** * @brief Retrieves the parse_options to be used for type inference and type casting * diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 69a51fab5dc..60e78f4763d 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -21,6 +21,7 @@ #include "nested_json.hpp" #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include -#include #include #include @@ -1446,11 +1446,7 @@ void get_stack_context(device_span json_in, constexpr StackSymbolT read_symbol = 'x'; // Number of stack operations in the input (i.e., number of '{', '}', '[', ']' outside of quotes) - rmm::device_scalar d_num_stack_ops(stream); - - // Sequence of stack symbols and their position in the original input (sparse representation) - rmm::device_uvector stack_ops{json_in.size(), stream}; - rmm::device_uvector stack_op_indices{json_in.size(), stream}; + cudf::detail::device_scalar d_num_stack_ops(stream); // Prepare finite-state transducer that only selects '{', '}', '[', ']' outside of quotes constexpr auto max_translation_table_size = @@ -1468,11 +1464,26 @@ void get_stack_context(device_span json_in, // "Search" for relevant occurrence of brackets and braces that indicate the beginning/end // of structs/lists + // Run FST to estimate the sizes of translated buffers + json_to_stack_ops_fst.Transduce(json_in.begin(), + static_cast(json_in.size()), + thrust::make_discard_iterator(), + thrust::make_discard_iterator(), + d_num_stack_ops.data(), + to_stack_op::start_state, + stream); + + auto stack_ops_bufsize = d_num_stack_ops.value(stream); + // Sequence of stack symbols and their position in the original input (sparse representation) + rmm::device_uvector stack_ops{stack_ops_bufsize, stream}; + rmm::device_uvector stack_op_indices{stack_ops_bufsize, stream}; + + // Run bracket-brace FST to retrieve starting positions of structs and lists json_to_stack_ops_fst.Transduce(json_in.begin(), static_cast(json_in.size()), stack_ops.data(), stack_op_indices.data(), - d_num_stack_ops.data(), + thrust::make_discard_iterator(), to_stack_op::start_state, stream); @@ -1508,6 +1519,7 @@ std::pair, rmm::device_uvector> pr device_span token_indices, rmm::cuda_stream_view stream) { + CUDF_FUNC_RANGE(); // Instantiate FST for post-processing the token stream to remove all tokens that belong to an // invalid JSON line token_filter::UnwrapTokenFromSymbolOp sgid_op{}; @@ -1519,7 +1531,7 @@ std::pair, rmm::device_uvector> pr stream); auto const mr = cudf::get_current_device_resource_ref(); - rmm::device_scalar d_num_selected_tokens(stream, mr); + cudf::detail::device_scalar d_num_selected_tokens(stream, mr); rmm::device_uvector filtered_tokens_out{tokens.size(), stream, mr}; rmm::device_uvector filtered_token_indices_out{tokens.size(), stream, mr}; @@ -1638,26 +1650,33 @@ std::pair, rmm::device_uvector> ge std::size_t constexpr max_tokens_per_struct = 6; auto const max_token_out_count = cudf::util::div_rounding_up_safe(json_in.size(), min_chars_per_struct) * max_tokens_per_struct; - rmm::device_scalar num_written_tokens{stream}; + cudf::detail::device_scalar num_written_tokens{stream}; // In case we're recovering on invalid JSON lines, post-processing the token stream requires to // see a JSON-line delimiter as the very first item SymbolOffsetT const delimiter_offset = (format == tokenizer_pda::json_format_cfg_t::JSON_LINES_RECOVER ? 1 : 0); - rmm::device_uvector tokens{max_token_out_count + delimiter_offset, stream, mr}; - rmm::device_uvector tokens_indices{ - max_token_out_count + delimiter_offset, stream, mr}; + // Run FST to estimate the size of output buffers json_to_tokens_fst.Transduce(zip_in, static_cast(json_in.size()), - tokens.data() + delimiter_offset, - tokens_indices.data() + delimiter_offset, + thrust::make_discard_iterator(), + thrust::make_discard_iterator(), num_written_tokens.data(), tokenizer_pda::start_state, stream); auto const num_total_tokens = num_written_tokens.value(stream) + delimiter_offset; - tokens.resize(num_total_tokens, stream); - tokens_indices.resize(num_total_tokens, stream); + rmm::device_uvector tokens{num_total_tokens, stream, mr}; + rmm::device_uvector tokens_indices{num_total_tokens, stream, mr}; + + // Run FST to translate the input JSON string into tokens and indices at which they occur + json_to_tokens_fst.Transduce(zip_in, + static_cast(json_in.size()), + tokens.data() + delimiter_offset, + tokens_indices.data() + delimiter_offset, + thrust::make_discard_iterator(), + tokenizer_pda::start_state, + stream); if (delimiter_offset == 1) { tokens.set_element(0, token_t::LineEnd, stream); diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index c424d2b3b62..2bc15ea19cb 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -127,7 +128,8 @@ datasource::owning_buffer get_record_range_raw_input( std::size_t const total_source_size = sources_size(sources, 0, 0); auto constexpr num_delimiter_chars = 1; - auto const num_extra_delimiters = num_delimiter_chars * (sources.size() - 1); + auto const delimiter = reader_opts.get_delimiter(); + auto const num_extra_delimiters = num_delimiter_chars * sources.size(); compression_type const reader_compression = reader_opts.get_compression(); std::size_t const chunk_offset = reader_opts.get_byte_range_offset(); std::size_t chunk_size = reader_opts.get_byte_range_size(); @@ -135,10 +137,10 @@ datasource::owning_buffer get_record_range_raw_input( CUDF_EXPECTS(total_source_size ? chunk_offset < total_source_size : !chunk_offset, "Invalid offsetting", std::invalid_argument); - auto should_load_all_sources = !chunk_size || chunk_size >= total_source_size - chunk_offset; - chunk_size = should_load_all_sources ? total_source_size - chunk_offset : chunk_size; + auto should_load_till_last_source = !chunk_size || chunk_size >= total_source_size - chunk_offset; + chunk_size = should_load_till_last_source ? total_source_size - chunk_offset : chunk_size; - int num_subchunks_prealloced = should_load_all_sources ? 0 : max_subchunks_prealloced; + int num_subchunks_prealloced = should_load_till_last_source ? 0 : max_subchunks_prealloced; std::size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); // The allocation for single source compressed input is estimated by assuming a ~4:1 @@ -155,17 +157,17 @@ datasource::owning_buffer get_record_range_raw_input( // Offset within buffer indicating first read position std::int64_t buffer_offset = 0; - auto readbufspan = - ingest_raw_input(bufspan, sources, reader_compression, chunk_offset, chunk_size, stream); + auto readbufspan = ingest_raw_input( + bufspan, sources, reader_compression, chunk_offset, chunk_size, delimiter, stream); auto const shift_for_nonzero_offset = std::min(chunk_offset, 1); auto const first_delim_pos = - chunk_offset == 0 ? 0 : find_first_delimiter(readbufspan, '\n', stream); + chunk_offset == 0 ? 0 : find_first_delimiter(readbufspan, delimiter, stream); if (first_delim_pos == -1) { // return empty owning datasource buffer auto empty_buf = rmm::device_buffer(0, stream); return datasource::owning_buffer(std::move(empty_buf)); - } else if (!should_load_all_sources) { + } else if (!should_load_till_last_source) { // Find next delimiter std::int64_t next_delim_pos = -1; std::size_t next_subchunk_start = chunk_offset + chunk_size; @@ -180,14 +182,15 @@ datasource::owning_buffer get_record_range_raw_input( reader_compression, next_subchunk_start, size_per_subchunk, + delimiter, stream); - next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset; + next_delim_pos = find_first_delimiter(readbufspan, delimiter, stream) + buffer_offset; next_subchunk_start += size_per_subchunk; } if (next_delim_pos < buffer_offset) { if (next_subchunk_start >= total_source_size) { // If we have reached the end of source list but the source does not terminate with a - // newline character + // delimiter character next_delim_pos = buffer_offset + readbufspan.size(); } else { // Our buffer_size estimate is insufficient to read until the end of the line! We need to @@ -209,10 +212,26 @@ datasource::owning_buffer get_record_range_raw_input( reinterpret_cast(buffer.data()) + first_delim_pos + shift_for_nonzero_offset, next_delim_pos - first_delim_pos - shift_for_nonzero_offset); } + + // Add delimiter to end of buffer - possibly adding an empty line to the input buffer - iff we are + // reading till the end of the last source i.e. should_load_till_last_source is true Note that the + // table generated from the JSONL input remains unchanged since empty lines are ignored by the + // parser. + size_t num_chars = readbufspan.size() - first_delim_pos - shift_for_nonzero_offset; + if (num_chars) { + auto last_char = delimiter; + cudf::detail::cuda_memcpy_async( + device_span(reinterpret_cast(buffer.data()), buffer.size()) + .subspan(readbufspan.size(), 1), + host_span(&last_char, 1, false), + stream); + num_chars++; + } + return datasource::owning_buffer( std::move(buffer), reinterpret_cast(buffer.data()) + first_delim_pos + shift_for_nonzero_offset, - readbufspan.size() - first_delim_pos - shift_for_nonzero_offset); + num_chars); } // Helper function to read the current batch using byte range offsets and size @@ -245,6 +264,7 @@ device_span ingest_raw_input(device_span buffer, compression_type compression, std::size_t range_offset, std::size_t range_size, + char delimiter, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); @@ -296,7 +316,7 @@ device_span ingest_raw_input(device_span buffer, if (sources.size() > 1) { static_assert(num_delimiter_chars == 1, "Currently only single-character delimiters are supported"); - auto const delimiter_source = thrust::make_constant_iterator('\n'); + auto const delimiter_source = thrust::make_constant_iterator(delimiter); auto const d_delimiter_map = cudf::detail::make_device_uvector_async( delimiter_map, stream, cudf::get_current_device_resource_ref()); thrust::scatter(rmm::exec_policy_nosync(stream), @@ -315,13 +335,12 @@ device_span ingest_raw_input(device_span buffer, // Reading to host because decompression of a single block is much faster on the CPU sources[0]->host_read(range_offset, remaining_bytes_to_read, hbuffer.data()); auto uncomp_data = decompress(compression, hbuffer); - CUDF_CUDA_TRY(cudaMemcpyAsync(buffer.data(), - reinterpret_cast(uncomp_data.data()), - uncomp_data.size() * sizeof(char), - cudaMemcpyHostToDevice, - stream.value())); - stream.synchronize(); - return buffer.first(uncomp_data.size()); + auto ret_buffer = buffer.first(uncomp_data.size()); + cudf::detail::cuda_memcpy( + ret_buffer, + host_span{reinterpret_cast(uncomp_data.data()), uncomp_data.size()}, + stream); + return ret_buffer; } table_with_metadata read_json(host_span> sources, diff --git a/cpp/src/io/json/read_json.hpp b/cpp/src/io/json/read_json.hpp index 982190eecb5..4def69cc629 100644 --- a/cpp/src/io/json/read_json.hpp +++ b/cpp/src/io/json/read_json.hpp @@ -56,6 +56,7 @@ device_span ingest_raw_input(device_span buffer, compression_type compression, size_t range_offset, size_t range_size, + char delimiter, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 9cc77e8e738..fcaee9c548e 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -516,10 +516,10 @@ void reader_impl::load_next_stripe_data(read_mode mode) _stream.synchronize(); stream_synchronized = true; } - device_read_tasks.push_back( - std::pair(source_ptr->device_read_async( - read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), - read_info.length)); + device_read_tasks.emplace_back( + source_ptr->device_read_async( + read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), + read_info.length); } else { auto buffer = source_ptr->host_read(read_info.offset, read_info.length); diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index a1e4aa65dcf..0081ed30d17 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -22,6 +22,8 @@ #include "io/utilities/hostdevice_span.hpp" #include +#include +#include #include #include #include @@ -32,7 +34,6 @@ #include #include -#include #include #include @@ -451,7 +452,7 @@ void decode_stream_data(int64_t num_dicts, update_null_mask(chunks, out_buffers, stream, mr); } - rmm::device_scalar error_count(0, stream); + cudf::detail::device_scalar error_count(0, stream); gpu::DecodeOrcColumnData(chunks.base_device_ptr(), global_dict.data(), row_groups, diff --git a/cpp/src/io/orc/reader_impl_helpers.cpp b/cpp/src/io/orc/reader_impl_helpers.cpp index 4c1079cffe8..7e5db4b7617 100644 --- a/cpp/src/io/orc/reader_impl_helpers.cpp +++ b/cpp/src/io/orc/reader_impl_helpers.cpp @@ -16,8 +16,6 @@ #include "reader_impl_helpers.hpp" -#include - namespace cudf::io::orc::detail { std::unique_ptr create_empty_column(size_type orc_col_id, diff --git a/cpp/src/io/orc/reader_impl_helpers.hpp b/cpp/src/io/orc/reader_impl_helpers.hpp index 5528b2ee763..4cded30d89b 100644 --- a/cpp/src/io/orc/reader_impl_helpers.hpp +++ b/cpp/src/io/orc/reader_impl_helpers.hpp @@ -20,9 +20,6 @@ #include "io/orc/orc.hpp" #include "io/utilities/column_buffer.hpp" -#include -#include - #include #include diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 03020eb649f..d432deb8e79 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -19,6 +19,7 @@ * @brief cuDF-IO ORC writer class implementation */ +#include "cudf/detail/utilities/cuda_memcpy.hpp" #include "io/comp/nvcomp_adapter.hpp" #include "io/orc/orc_gpu.hpp" #include "io/statistics/column_statistics.cuh" @@ -1408,7 +1409,8 @@ encoded_footer_statistics finish_statistic_blobs(Footer const& footer, num_entries_seen += stripes_per_col; } - std::vector file_stats_merge(num_file_blobs); + auto file_stats_merge = + cudf::detail::make_host_vector(num_file_blobs, stream); for (auto i = 0u; i < num_file_blobs; ++i) { auto col_stats = &file_stats_merge[i]; col_stats->col_dtype = per_chunk_stats.col_types[i]; @@ -1418,11 +1420,10 @@ encoded_footer_statistics finish_statistic_blobs(Footer const& footer, } auto d_file_stats_merge = stats_merge.device_ptr(num_stripe_blobs); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_file_stats_merge, - file_stats_merge.data(), - num_file_blobs * sizeof(statistics_merge_group), - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{stats_merge.device_ptr(num_stripe_blobs), num_file_blobs}, + file_stats_merge, + stream); auto file_stat_chunks = stat_chunks.data() + num_stripe_blobs; detail::merge_group_statistics( @@ -1573,7 +1574,7 @@ void write_index_stream(int32_t stripe_id, * @param[in] strm_desc Stream's descriptor * @param[in] enc_stream Chunk's streams * @param[in] compressed_data Compressed stream data - * @param[in,out] stream_out Temporary host output buffer + * @param[in,out] bounce_buffer Pinned memory bounce buffer for D2H data transfer * @param[in,out] stripe Stream's parent stripe * @param[in,out] streams List of all streams * @param[in] compression_kind The compression kind @@ -1584,7 +1585,7 @@ void write_index_stream(int32_t stripe_id, std::future write_data_stream(gpu::StripeStream const& strm_desc, gpu::encoder_chunk_streams const& enc_stream, uint8_t const* compressed_data, - uint8_t* stream_out, + host_span bounce_buffer, StripeInformation* stripe, orc_streams* streams, CompressionKind compression_kind, @@ -1604,11 +1605,10 @@ std::future write_data_stream(gpu::StripeStream const& strm_desc, if (out_sink->is_device_write_preferred(length)) { return out_sink->device_write_async(stream_in, length, stream); } else { - CUDF_CUDA_TRY( - cudaMemcpyAsync(stream_out, stream_in, length, cudaMemcpyDefault, stream.value())); - stream.synchronize(); + cudf::detail::cuda_memcpy( + bounce_buffer.subspan(0, length), device_span{stream_in, length}, stream); - out_sink->host_write(stream_out, length); + out_sink->host_write(bounce_buffer.data(), length); return std::async(std::launch::deferred, [] {}); } }(); @@ -2616,7 +2616,7 @@ void writer::impl::write_orc_data_to_sink(encoded_data const& enc_data, strm_desc, enc_data.streams[strm_desc.column_id][segmentation.stripes[stripe_id].first], compressed_data.data(), - bounce_buffer.data(), + bounce_buffer, &stripe, &streams, _compression_kind, diff --git a/cpp/src/io/parquet/arrow_schema_writer.cpp b/cpp/src/io/parquet/arrow_schema_writer.cpp index ddf65e9020f..d15435b2553 100644 --- a/cpp/src/io/parquet/arrow_schema_writer.cpp +++ b/cpp/src/io/parquet/arrow_schema_writer.cpp @@ -27,7 +27,6 @@ #include "ipc/Schema_generated.h" #include "writer_impl_helpers.hpp" -#include #include #include diff --git a/cpp/src/io/parquet/arrow_schema_writer.hpp b/cpp/src/io/parquet/arrow_schema_writer.hpp index 9bc435bf6c8..66810ee163a 100644 --- a/cpp/src/io/parquet/arrow_schema_writer.hpp +++ b/cpp/src/io/parquet/arrow_schema_writer.hpp @@ -22,10 +22,9 @@ #pragma once #include -#include -#include +#include +#include #include -#include namespace cudf::io::parquet::detail { diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp index 12c24e2b848..b87f2e9c692 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.hpp +++ b/cpp/src/io/parquet/compact_protocol_reader.hpp @@ -22,10 +22,7 @@ #include #include -#include -#include #include -#include namespace CUDF_EXPORT cudf { namespace io::parquet::detail { diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp index d4778b1ea15..05859d60c03 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.hpp +++ b/cpp/src/io/parquet/compact_protocol_writer.hpp @@ -17,7 +17,6 @@ #pragma once #include "parquet.hpp" -#include "parquet_common.hpp" #include #include diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu index 4522ea7fe56..45380e6ea20 100644 --- a/cpp/src/io/parquet/decode_fixed.cu +++ b/cpp/src/io/parquet/decode_fixed.cu @@ -37,7 +37,14 @@ struct block_scan_results { }; template -static __device__ void scan_block_exclusive_sum(int thread_bit, block_scan_results& results) +using block_scan_temp_storage = int[decode_block_size / cudf::detail::warp_size]; + +// Similar to CUB, must __syncthreads() after calling if reusing temp_storage +template +__device__ inline static void scan_block_exclusive_sum( + int thread_bit, + block_scan_results& results, + block_scan_temp_storage& temp_storage) { int const t = threadIdx.x; int const warp_index = t / cudf::detail::warp_size; @@ -45,15 +52,19 @@ static __device__ void scan_block_exclusive_sum(int thread_bit, block_scan_resul uint32_t const lane_mask = (uint32_t(1) << warp_lane) - 1; uint32_t warp_bits = ballot(thread_bit); - scan_block_exclusive_sum(warp_bits, warp_lane, warp_index, lane_mask, results); + scan_block_exclusive_sum( + warp_bits, warp_lane, warp_index, lane_mask, results, temp_storage); } +// Similar to CUB, must __syncthreads() after calling if reusing temp_storage template -__device__ static void scan_block_exclusive_sum(uint32_t warp_bits, - int warp_lane, - int warp_index, - uint32_t lane_mask, - block_scan_results& results) +__device__ static void scan_block_exclusive_sum( + uint32_t warp_bits, + int warp_lane, + int warp_index, + uint32_t lane_mask, + block_scan_results& results, + block_scan_temp_storage& temp_storage) { // Compute # warps constexpr int num_warps = decode_block_size / cudf::detail::warp_size; @@ -64,49 +75,64 @@ __device__ static void scan_block_exclusive_sum(uint32_t warp_bits, results.thread_count_within_warp = __popc(results.warp_bits & lane_mask); // Share the warp counts amongst the block threads - __shared__ int warp_counts[num_warps]; - if (warp_lane == 0) { warp_counts[warp_index] = results.warp_count; } - __syncthreads(); + if (warp_lane == 0) { temp_storage[warp_index] = results.warp_count; } + __syncthreads(); // Sync to share counts between threads/warps // Compute block-wide results results.block_count = 0; results.thread_count_within_block = results.thread_count_within_warp; for (int warp_idx = 0; warp_idx < num_warps; ++warp_idx) { - results.block_count += warp_counts[warp_idx]; - if (warp_idx < warp_index) { results.thread_count_within_block += warp_counts[warp_idx]; } + results.block_count += temp_storage[warp_idx]; + if (warp_idx < warp_index) { results.thread_count_within_block += temp_storage[warp_idx]; } } } -template -__device__ inline void gpuDecodeFixedWidthValues( +template +__device__ void gpuDecodeFixedWidthValues( page_state_s* s, state_buf* const sb, int start, int end, int t) { constexpr int num_warps = block_size / cudf::detail::warp_size; constexpr int max_batch_size = num_warps * cudf::detail::warp_size; - PageNestingDecodeInfo* nesting_info_base = s->nesting_info; - int const dtype = s->col.physical_type; + // nesting level that is storing actual leaf values + int const leaf_level_index = s->col.max_nesting_depth - 1; + auto const data_out = s->nesting_info[leaf_level_index].data_out; + + int const dtype = s->col.physical_type; + uint32_t const dtype_len = s->dtype_len; + + int const skipped_leaf_values = s->page.skipped_leaf_values; // decode values int pos = start; while (pos < end) { int const batch_size = min(max_batch_size, end - pos); - int const target_pos = pos + batch_size; - int const src_pos = pos + t; + int const thread_pos = pos + t; - // the position in the output column/buffer - int dst_pos = sb->nz_idx[rolling_index(src_pos)] - s->first_row; + // Index from value buffer (doesn't include nulls) to final array (has gaps for nulls) + int const dst_pos = [&]() { + int dst_pos = sb->nz_idx[rolling_index(thread_pos)]; + if constexpr (!has_lists_t) { dst_pos -= s->first_row; } + return dst_pos; + }(); // target_pos will always be properly bounded by num_rows, but dst_pos may be negative (values // before first_row) in the flat hierarchy case. - if (src_pos < target_pos && dst_pos >= 0) { + if (thread_pos < target_pos && dst_pos >= 0) { // nesting level that is storing actual leaf values - int const leaf_level_index = s->col.max_nesting_depth - 1; - uint32_t dtype_len = s->dtype_len; - void* dst = - nesting_info_base[leaf_level_index].data_out + static_cast(dst_pos) * dtype_len; + // src_pos represents the logical row position we want to read from. But in the case of + // nested hierarchies (lists), there is no 1:1 mapping of rows to values. So src_pos + // has to take into account the # of values we have to skip in the page to get to the + // desired logical row. For flat hierarchies, skipped_leaf_values will always be 0. + int const src_pos = [&]() { + if constexpr (has_lists_t) { return thread_pos + skipped_leaf_values; } + return thread_pos; + }(); + + void* const dst = data_out + (static_cast(dst_pos) * dtype_len); + if (s->col.logical_type.has_value() && s->col.logical_type->type == LogicalType::DECIMAL) { switch (dtype) { case INT32: gpuOutputFast(s, sb, src_pos, static_cast(dst)); break; @@ -145,15 +171,15 @@ __device__ inline void gpuDecodeFixedWidthValues( } } -template +template struct decode_fixed_width_values_func { __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) { - gpuDecodeFixedWidthValues(s, sb, start, end, t); + gpuDecodeFixedWidthValues(s, sb, start, end, t); } }; -template +template __device__ inline void gpuDecodeFixedWidthSplitValues( page_state_s* s, state_buf* const sb, int start, int end, int t) { @@ -161,10 +187,15 @@ __device__ inline void gpuDecodeFixedWidthSplitValues( constexpr int num_warps = block_size / warp_size; constexpr int max_batch_size = num_warps * warp_size; - PageNestingDecodeInfo* nesting_info_base = s->nesting_info; - int const dtype = s->col.physical_type; - auto const data_len = thrust::distance(s->data_start, s->data_end); - auto const num_values = data_len / s->dtype_len_in; + // nesting level that is storing actual leaf values + int const leaf_level_index = s->col.max_nesting_depth - 1; + auto const data_out = s->nesting_info[leaf_level_index].data_out; + + int const dtype = s->col.physical_type; + auto const data_len = thrust::distance(s->data_start, s->data_end); + auto const num_values = data_len / s->dtype_len_in; + + int const skipped_leaf_values = s->page.skipped_leaf_values; // decode values int pos = start; @@ -172,21 +203,34 @@ __device__ inline void gpuDecodeFixedWidthSplitValues( int const batch_size = min(max_batch_size, end - pos); int const target_pos = pos + batch_size; - int const src_pos = pos + t; + int const thread_pos = pos + t; // the position in the output column/buffer - int dst_pos = sb->nz_idx[rolling_index(src_pos)] - s->first_row; + // Index from value buffer (doesn't include nulls) to final array (has gaps for nulls) + int const dst_pos = [&]() { + int dst_pos = sb->nz_idx[rolling_index(thread_pos)]; + if constexpr (!has_lists_t) { dst_pos -= s->first_row; } + return dst_pos; + }(); // target_pos will always be properly bounded by num_rows, but dst_pos may be negative (values // before first_row) in the flat hierarchy case. - if (src_pos < target_pos && dst_pos >= 0) { - // nesting level that is storing actual leaf values - int const leaf_level_index = s->col.max_nesting_depth - 1; + if (thread_pos < target_pos && dst_pos >= 0) { + // src_pos represents the logical row position we want to read from. But in the case of + // nested hierarchies (lists), there is no 1:1 mapping of rows to values. So src_pos + // has to take into account the # of values we have to skip in the page to get to the + // desired logical row. For flat hierarchies, skipped_leaf_values will always be 0. + int const src_pos = [&]() { + if constexpr (has_lists_t) { + return thread_pos + skipped_leaf_values; + } else { + return thread_pos; + } + }(); - uint32_t dtype_len = s->dtype_len; - uint8_t const* src = s->data_start + src_pos; - uint8_t* dst = - nesting_info_base[leaf_level_index].data_out + static_cast(dst_pos) * dtype_len; + uint32_t const dtype_len = s->dtype_len; + uint8_t const* const src = s->data_start + src_pos; + uint8_t* const dst = data_out + static_cast(dst_pos) * dtype_len; auto const is_decimal = s->col.logical_type.has_value() and s->col.logical_type->type == LogicalType::DECIMAL; @@ -239,11 +283,11 @@ __device__ inline void gpuDecodeFixedWidthSplitValues( } } -template +template struct decode_fixed_width_split_values_func { __device__ inline void operator()(page_state_s* s, state_buf* const sb, int start, int end, int t) { - gpuDecodeFixedWidthSplitValues(s, sb, start, end, t); + gpuDecodeFixedWidthSplitValues(s, sb, start, end, t); } }; @@ -274,12 +318,14 @@ static __device__ int gpuUpdateValidityAndRowIndicesNested( int const batch_size = min(max_batch_size, capped_target_value_count - value_count); // definition level - int d = 1; - if (t >= batch_size) { - d = -1; - } else if (def) { - d = static_cast(def[rolling_index(value_count + t)]); - } + int const d = [&]() { + if (t >= batch_size) { + return -1; + } else if (def) { + return static_cast(def[rolling_index(value_count + t)]); + } + return 1; + }(); int const thread_value_count = t; int const block_value_count = batch_size; @@ -340,6 +386,7 @@ static __device__ int gpuUpdateValidityAndRowIndicesNested( if (is_valid) { int const dst_pos = value_count + thread_value_count; int const src_pos = max_depth_valid_count + thread_valid_count; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; } // update stuff @@ -396,16 +443,16 @@ static __device__ int gpuUpdateValidityAndRowIndicesFlat( int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); // use definition level & row bounds to determine if is valid - int is_valid; - if (t >= batch_size) { - is_valid = 0; - } else if (def) { - int const def_level = - static_cast(def[rolling_index(value_count + t)]); - is_valid = ((def_level > 0) && in_row_bounds) ? 1 : 0; - } else { - is_valid = in_row_bounds; - } + int const is_valid = [&]() { + if (t >= batch_size) { + return 0; + } else if (def) { + int const def_level = + static_cast(def[rolling_index(value_count + t)]); + return ((def_level > 0) && in_row_bounds) ? 1 : 0; + } + return in_row_bounds; + }(); // thread and block validity count using block_scan = cub::BlockScan; @@ -447,8 +494,9 @@ static __device__ int gpuUpdateValidityAndRowIndicesFlat( // output offset if (is_valid) { - int const dst_pos = value_count + thread_value_count; - int const src_pos = valid_count + thread_valid_count; + int const dst_pos = value_count + thread_value_count; + int const src_pos = valid_count + thread_valid_count; + sb->nz_idx[rolling_index(src_pos)] = dst_pos; } @@ -460,7 +508,7 @@ static __device__ int gpuUpdateValidityAndRowIndicesFlat( if (t == 0) { // update valid value count for decoding and total # of values we've processed ni.valid_count = valid_count; - ni.value_count = value_count; // TODO: remove? this is unused in the non-list path + ni.value_count = value_count; s->nz_count = valid_count; s->input_value_count = value_count; s->input_row_count = value_count; @@ -533,6 +581,239 @@ static __device__ int gpuUpdateValidityAndRowIndicesNonNullable(int32_t target_v return valid_count; } +template +static __device__ int gpuUpdateValidityAndRowIndicesLists(int32_t target_value_count, + page_state_s* s, + state_buf* sb, + level_t const* const def, + level_t const* const rep, + int t) +{ + constexpr int num_warps = decode_block_size / cudf::detail::warp_size; + constexpr int max_batch_size = num_warps * cudf::detail::warp_size; + + // how many (input) values we've processed in the page so far, prior to this loop iteration + int value_count = s->input_value_count; + + // how many rows we've processed in the page so far + int input_row_count = s->input_row_count; + + // cap by last row so that we don't process any rows past what we want to output. + int const first_row = s->first_row; + int const last_row = first_row + s->num_rows; + + int const row_index_lower_bound = s->row_index_lower_bound; + int const max_depth = s->col.max_nesting_depth - 1; + int max_depth_valid_count = s->nesting_info[max_depth].valid_count; + + int const warp_index = t / cudf::detail::warp_size; + int const warp_lane = t % cudf::detail::warp_size; + bool const is_first_lane = (warp_lane == 0); + + __syncthreads(); + __shared__ block_scan_temp_storage temp_storage; + + while (value_count < target_value_count) { + bool const within_batch = value_count + t < target_value_count; + + // get definition level, use repetition level to get start/end depth + // different for each thread, as each thread has a different r/d + auto const [def_level, start_depth, end_depth] = [&]() { + if (!within_batch) { return cuda::std::make_tuple(-1, -1, -1); } + + int const level_index = rolling_index(value_count + t); + int const rep_level = static_cast(rep[level_index]); + int const start_depth = s->nesting_info[rep_level].start_depth; + + if constexpr (!nullable) { + return cuda::std::make_tuple(-1, start_depth, max_depth); + } else { + if (def != nullptr) { + int const def_level = static_cast(def[level_index]); + return cuda::std::make_tuple( + def_level, start_depth, s->nesting_info[def_level].end_depth); + } else { + return cuda::std::make_tuple(1, start_depth, max_depth); + } + } + }(); + + // Determine value count & row index + // track (page-relative) row index for the thread so we can compare against input bounds + // keep track of overall # of rows we've read. + int const is_new_row = start_depth == 0 ? 1 : 0; + int num_prior_new_rows, total_num_new_rows; + { + block_scan_results new_row_scan_results; + scan_block_exclusive_sum(is_new_row, new_row_scan_results, temp_storage); + __syncthreads(); + num_prior_new_rows = new_row_scan_results.thread_count_within_block; + total_num_new_rows = new_row_scan_results.block_count; + } + + int const row_index = input_row_count + ((num_prior_new_rows + is_new_row) - 1); + input_row_count += total_num_new_rows; + int const in_row_bounds = (row_index >= row_index_lower_bound) && (row_index < last_row); + + // VALUE COUNT: + // in_nesting_bounds: if at a nesting level where we need to add value indices + // the bounds: from current rep to the rep AT the def depth + int in_nesting_bounds = ((0 >= start_depth && 0 <= end_depth) && in_row_bounds) ? 1 : 0; + int thread_value_count_within_warp, warp_value_count, thread_value_count, block_value_count; + { + block_scan_results value_count_scan_results; + scan_block_exclusive_sum( + in_nesting_bounds, value_count_scan_results, temp_storage); + __syncthreads(); + + thread_value_count_within_warp = value_count_scan_results.thread_count_within_warp; + warp_value_count = value_count_scan_results.warp_count; + thread_value_count = value_count_scan_results.thread_count_within_block; + block_value_count = value_count_scan_results.block_count; + } + + // iterate by depth + for (int d_idx = 0; d_idx <= max_depth; d_idx++) { + auto& ni = s->nesting_info[d_idx]; + + // everything up to the max_def_level is a non-null value + int const is_valid = [&](int input_def_level) { + if constexpr (nullable) { + return ((input_def_level >= ni.max_def_level) && in_nesting_bounds) ? 1 : 0; + } else { + return in_nesting_bounds; + } + }(def_level); + + // VALID COUNT: + // Not all values visited by this block will represent a value at this nesting level. + // the validity bit for thread t might actually represent output value t-6. + // the correct position for thread t's bit is thread_value_count. + uint32_t const warp_valid_mask = + WarpReduceOr32((uint32_t)is_valid << thread_value_count_within_warp); + int thread_valid_count, block_valid_count; + { + auto thread_mask = (uint32_t(1) << thread_value_count_within_warp) - 1; + + block_scan_results valid_count_scan_results; + scan_block_exclusive_sum(warp_valid_mask, + warp_lane, + warp_index, + thread_mask, + valid_count_scan_results, + temp_storage); + __syncthreads(); + thread_valid_count = valid_count_scan_results.thread_count_within_block; + block_valid_count = valid_count_scan_results.block_count; + } + + // compute warp and thread value counts for the -next- nesting level. we need to + // do this for lists so that we can emit an offset for the -current- nesting level. + // the offset for the current nesting level == current length of the next nesting level + int next_thread_value_count_within_warp = 0, next_warp_value_count = 0; + int next_thread_value_count = 0, next_block_value_count = 0; + int next_in_nesting_bounds = 0; + if (d_idx < max_depth) { + // NEXT DEPTH VALUE COUNT: + next_in_nesting_bounds = + ((d_idx + 1 >= start_depth) && (d_idx + 1 <= end_depth) && in_row_bounds) ? 1 : 0; + { + block_scan_results next_value_count_scan_results; + scan_block_exclusive_sum( + next_in_nesting_bounds, next_value_count_scan_results, temp_storage); + __syncthreads(); + + next_thread_value_count_within_warp = + next_value_count_scan_results.thread_count_within_warp; + next_warp_value_count = next_value_count_scan_results.warp_count; + next_thread_value_count = next_value_count_scan_results.thread_count_within_block; + next_block_value_count = next_value_count_scan_results.block_count; + } + + // STORE OFFSET TO THE LIST LOCATION + // if we're -not- at a leaf column and we're within nesting/row bounds + // and we have a valid data_out pointer, it implies this is a list column, so + // emit an offset. + if (in_nesting_bounds && ni.data_out != nullptr) { + const auto& next_ni = s->nesting_info[d_idx + 1]; + int const idx = ni.value_count + thread_value_count; + cudf::size_type const ofs = + next_ni.value_count + next_thread_value_count + next_ni.page_start_value; + + (reinterpret_cast(ni.data_out))[idx] = ofs; + } + } + + // validity is processed per-warp (on lane 0's) + // thi is because when atomic writes are needed, they are 32-bit operations + // + // lists always read and write to the same bounds + // (that is, read and write positions are already pre-bounded by first_row/num_rows). + // since we are about to write the validity vector + // here we need to adjust our computed mask to take into account the write row bounds. + if constexpr (nullable) { + if (is_first_lane && (ni.valid_map != nullptr) && (warp_value_count > 0)) { + // absolute bit offset into the output validity map + // is cumulative sum of warp_value_count at the given nesting depth + // DON'T subtract by first_row: since it's lists it's not 1-row-per-value + int const bit_offset = ni.valid_map_offset + thread_value_count; + + store_validity(bit_offset, ni.valid_map, warp_valid_mask, warp_value_count); + } + + if (t == 0) { ni.null_count += block_value_count - block_valid_count; } + } + + // if this is valid and we're at the leaf, output dst_pos + // Read value_count before the sync, so that when thread 0 modifies it we've already read its + // value + int const current_value_count = ni.value_count; + __syncthreads(); // guard against modification of ni.value_count below + if (d_idx == max_depth) { + if (is_valid) { + int const dst_pos = current_value_count + thread_value_count; + int const src_pos = max_depth_valid_count + thread_valid_count; + int const output_index = rolling_index(src_pos); + + // Index from rolling buffer of values (which doesn't include nulls) to final array (which + // includes gaps for nulls) + sb->nz_idx[output_index] = dst_pos; + } + max_depth_valid_count += block_valid_count; + } + + // update stuff + if (t == 0) { + ni.value_count += block_value_count; + ni.valid_map_offset += block_value_count; + } + __syncthreads(); // sync modification of ni.value_count + + // propagate value counts for the next depth level + block_value_count = next_block_value_count; + thread_value_count = next_thread_value_count; + in_nesting_bounds = next_in_nesting_bounds; + warp_value_count = next_warp_value_count; + thread_value_count_within_warp = next_thread_value_count_within_warp; + } // END OF DEPTH LOOP + + int const batch_size = min(max_batch_size, target_value_count - value_count); + value_count += batch_size; + } + + if (t == 0) { + // update valid value count for decoding and total # of values we've processed + s->nesting_info[max_depth].valid_count = max_depth_valid_count; + s->nz_count = max_depth_valid_count; + s->input_value_count = value_count; + + // If we have lists # rows != # values + s->input_row_count = input_row_count; + } + + return max_depth_valid_count; +} + // is the page marked nullable or not __device__ inline bool is_nullable(page_state_s* s) { @@ -560,6 +841,23 @@ __device__ inline bool maybe_has_nulls(page_state_s* s) return run_val != s->col.max_level[lvl]; } +template +__device__ int skip_decode(stream_type& parquet_stream, int num_to_skip, int t) +{ + // it could be that (e.g.) we skip 5000 but starting at row 4000 we have a run of length 2000: + // in that case skip_decode() only skips 4000, and we have to process the remaining 1000 up front + // modulo 2 * block_size of course, since that's as many as we process at once + int num_skipped = parquet_stream.skip_decode(t, num_to_skip); + while (num_skipped < num_to_skip) { + // TODO: Instead of decoding, skip within the run to the appropriate location + auto const to_decode = min(rolling_buf_size, num_to_skip - num_skipped); + num_skipped += parquet_stream.decode_next(t, to_decode); + __syncthreads(); + } + + return num_skipped; +} + /** * @brief Kernel for computing fixed width non dictionary column data stored in the pages * @@ -579,9 +877,10 @@ template + bool has_lists_t, + template typename DecodeValuesFunc> -CUDF_KERNEL void __launch_bounds__(decode_block_size_t) +CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8) gpuDecodePageDataGeneric(PageInfo* pages, device_span chunks, size_t min_row, @@ -621,31 +920,29 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) // if we have no work to do (eg, in a skip_rows/num_rows case) in this page. if (s->num_rows == 0) { return; } - DecodeValuesFunc decode_values; + DecodeValuesFunc decode_values; - bool const nullable = is_nullable(s); - bool const should_process_nulls = nullable && maybe_has_nulls(s); + bool const should_process_nulls = is_nullable(s) && maybe_has_nulls(s); // shared buffer. all shared memory is suballocated out of here - // constexpr int shared_rep_size = has_lists_t ? cudf::util::round_up_unsafe(rle_run_buffer_size * - // sizeof(rle_run), size_t{16}) : 0; + constexpr int shared_rep_size = + has_lists_t + ? cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}) + : 0; constexpr int shared_dict_size = has_dict_t ? cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}) : 0; constexpr int shared_def_size = cudf::util::round_up_unsafe(rle_run_buffer_size * sizeof(rle_run), size_t{16}); - constexpr int shared_buf_size = /*shared_rep_size +*/ shared_dict_size + shared_def_size; + constexpr int shared_buf_size = shared_rep_size + shared_dict_size + shared_def_size; __shared__ __align__(16) uint8_t shared_buf[shared_buf_size]; // setup all shared memory buffers - int shared_offset = 0; - /* - rle_run *rep_runs = reinterpret_cast*>(shared_buf + shared_offset); - if constexpr (has_lists_t){ - shared_offset += shared_rep_size; - } - */ + int shared_offset = 0; + rle_run* rep_runs = reinterpret_cast*>(shared_buf + shared_offset); + if constexpr (has_lists_t) { shared_offset += shared_rep_size; } + rle_run* dict_runs = reinterpret_cast*>(shared_buf + shared_offset); if constexpr (has_dict_t) { shared_offset += shared_dict_size; } rle_run* def_runs = reinterpret_cast*>(shared_buf + shared_offset); @@ -660,38 +957,51 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) def, s->page.num_input_values); } - /* + rle_stream rep_decoder{rep_runs}; level_t* const rep = reinterpret_cast(pp->lvl_decode_buf[level_type::REPETITION]); - if constexpr(has_lists_t){ + if constexpr (has_lists_t) { rep_decoder.init(s->col.level_bits[level_type::REPETITION], s->abs_lvl_start[level_type::REPETITION], s->abs_lvl_end[level_type::REPETITION], rep, s->page.num_input_values); } - */ rle_stream dict_stream{dict_runs}; if constexpr (has_dict_t) { dict_stream.init( s->dict_bits, s->data_start, s->data_end, sb->dict_idx, s->page.num_input_values); } - __syncthreads(); // We use two counters in the loop below: processed_count and valid_count. - // - processed_count: number of rows out of num_input_values that we have decoded so far. + // - processed_count: number of values out of num_input_values that we have decoded so far. // the definition stream returns the number of total rows it has processed in each call // to decode_next and we accumulate in process_count. - // - valid_count: number of non-null rows we have decoded so far. In each iteration of the + // - valid_count: number of non-null values we have decoded so far. In each iteration of the // loop below, we look at the number of valid items (which could be all for non-nullable), // and valid_count is that running count. int processed_count = 0; int valid_count = 0; + + // Skip ahead in the decoding so that we don't repeat work (skipped_leaf_values = 0 for non-lists) + if constexpr (has_lists_t) { + auto const skipped_leaf_values = s->page.skipped_leaf_values; + if (skipped_leaf_values > 0) { + if (should_process_nulls) { + skip_decode(def_decoder, skipped_leaf_values, t); + } + processed_count = skip_decode(rep_decoder, skipped_leaf_values, t); + if constexpr (has_dict_t) { + skip_decode(dict_stream, skipped_leaf_values, t); + } + } + } + // the core loop. decode batches of level stream data using rle_stream objects // and pass the results to gpuDecodeValues // For chunked reads we may not process all of the rows on the page; if not stop early - int last_row = s->first_row + s->num_rows; + int const last_row = s->first_row + s->num_rows; while ((s->error == 0) && (processed_count < s->page.num_input_values) && (s->input_row_count <= last_row)) { int next_valid_count; @@ -701,7 +1011,12 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) processed_count += def_decoder.decode_next(t); __syncthreads(); - if constexpr (has_nesting_t) { + if constexpr (has_lists_t) { + rep_decoder.decode_next(t); + __syncthreads(); + next_valid_count = gpuUpdateValidityAndRowIndicesLists( + processed_count, s, sb, def, rep, t); + } else if constexpr (has_nesting_t) { next_valid_count = gpuUpdateValidityAndRowIndicesNested( processed_count, s, sb, def, t); } else { @@ -713,9 +1028,16 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t) // this function call entirely since all it will ever generate is a mapping of (i -> i) for // nz_idx. gpuDecodeFixedWidthValues would be the only work that happens. else { - processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); - next_valid_count = - gpuUpdateValidityAndRowIndicesNonNullable(processed_count, s, sb, t); + if constexpr (has_lists_t) { + processed_count += rep_decoder.decode_next(t); + __syncthreads(); + next_valid_count = gpuUpdateValidityAndRowIndicesLists( + processed_count, s, sb, nullptr, rep, t); + } else { + processed_count += min(rolling_buf_size, s->page.num_input_values - processed_count); + next_valid_count = + gpuUpdateValidityAndRowIndicesNonNullable(processed_count, s, sb, t); + } } __syncthreads(); @@ -745,6 +1067,7 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { @@ -754,12 +1077,23 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, dim3 dim_grid(pages.size(), 1); // 1 threadblock per page if (level_type_size == 1) { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -769,17 +1103,29 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, decode_kernel_mask::FIXED_WIDTH_NO_DICT, false, false, + false, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); } } else { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -789,6 +1135,7 @@ void __host__ DecodePageDataFixed(cudf::detail::hostdevice_span pages, decode_kernel_mask::FIXED_WIDTH_NO_DICT, false, false, + false, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -802,6 +1149,7 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { @@ -811,12 +1159,23 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -826,17 +1185,29 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa decode_kernel_mask::FIXED_WIDTH_DICT, true, false, + false, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); } } else { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -846,6 +1217,7 @@ void __host__ DecodePageDataFixedDict(cudf::detail::hostdevice_span pa decode_kernel_mask::FIXED_WIDTH_DICT, true, false, + true, decode_fixed_width_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -860,6 +1232,7 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream) { @@ -869,12 +1242,23 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, dim3 dim_grid(pages.size(), 1); // 1 thread block per page => # blocks if (level_type_size == 1) { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -884,17 +1268,29 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT, false, false, + false, decode_fixed_width_split_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); } } else { - if (has_nesting) { + if (is_list) { + gpuDecodePageDataGeneric + <<>>( + pages.device_ptr(), chunks, min_row, num_rows, error_code); + } else if (has_nesting) { gpuDecodePageDataGeneric <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); @@ -904,6 +1300,7 @@ DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT, false, false, + false, decode_fixed_width_split_values_func> <<>>( pages.device_ptr(), chunks, min_row, num_rows, error_code); diff --git a/cpp/src/io/parquet/error.hpp b/cpp/src/io/parquet/error.hpp index f0fc9fab3ab..8b3d1d7a6c3 100644 --- a/cpp/src/io/parquet/error.hpp +++ b/cpp/src/io/parquet/error.hpp @@ -26,7 +26,7 @@ namespace cudf::io::parquet { /** - * @brief Wrapper around a `rmm::device_scalar` for use in reporting errors that occur in + * @brief Specialized device scalar for use in reporting errors that occur in * kernel calls. * * The `kernel_error` object is created with a `rmm::cuda_stream_view` which is used throughout diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index b3276c81c1f..0d24fa4236f 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -21,6 +21,7 @@ #include +#include #include namespace cudf::io::parquet::detail { @@ -476,9 +477,9 @@ void WriteFinalOffsets(host_span offsets, auto d_src_data = cudf::detail::make_device_uvector_async( offsets, stream, cudf::get_current_device_resource_ref()); // Iterator for the source (scalar) data - auto src_iter = cudf::detail::make_counting_transform_iterator( - static_cast(0), - cuda::proclaim_return_type( + auto src_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( [src = d_src_data.begin()] __device__(std::size_t i) { return src + i; })); // Copy buffer addresses to device and create an iterator diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index d604642be54..52d53cb8225 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -183,17 +183,20 @@ __device__ decode_kernel_mask kernel_mask_for_page(PageInfo const& page, return decode_kernel_mask::STRING; } - if (!is_list(chunk) && !is_byte_array(chunk) && !is_boolean(chunk)) { + if (!is_byte_array(chunk) && !is_boolean(chunk)) { if (page.encoding == Encoding::PLAIN) { - return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED - : decode_kernel_mask::FIXED_WIDTH_NO_DICT; + return is_list(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_LIST + : is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_NO_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_NO_DICT; } else if (page.encoding == Encoding::PLAIN_DICTIONARY || page.encoding == Encoding::RLE_DICTIONARY) { - return is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_NESTED - : decode_kernel_mask::FIXED_WIDTH_DICT; + return is_list(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_LIST + : is_nested(chunk) ? decode_kernel_mask::FIXED_WIDTH_DICT_NESTED + : decode_kernel_mask::FIXED_WIDTH_DICT; } else if (page.encoding == Encoding::BYTE_STREAM_SPLIT) { - return is_nested(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED - : decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT; + return is_list(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_LIST + : is_nested(chunk) ? decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_NESTED + : decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_FLAT; } } diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 4f6d41a97da..dba24b553e6 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -22,14 +22,13 @@ #include "io/parquet/parquet_common.hpp" #include "io/statistics/statistics.cuh" #include "io/utilities/column_buffer.hpp" -#include "io/utilities/hostdevice_vector.hpp" +#include #include #include #include #include -#include #include #include @@ -221,6 +220,10 @@ enum class decode_kernel_mask { (1 << 9), // Same as above but for nested, fixed-width data FIXED_WIDTH_NO_DICT_NESTED = (1 << 10), // Run decode kernel for fixed width non-dictionary pages FIXED_WIDTH_DICT_NESTED = (1 << 11), // Run decode kernel for fixed width dictionary pages + FIXED_WIDTH_DICT_LIST = (1 << 12), // Run decode kernel for fixed width dictionary pages + FIXED_WIDTH_NO_DICT_LIST = (1 << 13), // Run decode kernel for fixed width non-dictionary pages + BYTE_STREAM_SPLIT_FIXED_WIDTH_LIST = + (1 << 14), // Run decode kernel for BYTE_STREAM_SPLIT encoded data for fixed width lists }; // mask representing all the ways in which a string can be encoded @@ -909,6 +912,7 @@ void DecodeDeltaLengthByteArray(cudf::detail::hostdevice_span pages, * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding * @param[in] has_nesting Whether or not the data contains nested (but not list) data. + * @param[in] is_list Whether or not the data contains list data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -918,6 +922,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream); @@ -933,6 +938,7 @@ void DecodePageDataFixed(cudf::detail::hostdevice_span pages, * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding * @param[in] has_nesting Whether or not the data contains nested (but not list) data. + * @param[in] is_list Whether or not the data contains list data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -942,6 +948,7 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream); @@ -957,6 +964,7 @@ void DecodePageDataFixedDict(cudf::detail::hostdevice_span pages, * @param[in] min_row Minimum number of rows to read * @param[in] level_type_size Size in bytes of the type for level decoding * @param[in] has_nesting Whether or not the data contains nested (but not list) data. + * @param[in] is_list Whether or not the data contains list data. * @param[out] error_code Error code for kernel failures * @param[in] stream CUDA stream to use */ @@ -966,6 +974,7 @@ void DecodeSplitPageFixedWidthData(cudf::detail::hostdevice_span pages size_t min_row, int level_type_size, bool has_nesting, + bool is_list, kernel_error::pointer error_code, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index f0a0bc0b51b..a965f3325d5 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -454,15 +453,18 @@ std::optional>> aggregate_reader_metadata::fi CUDF_EXPECTS(predicate.type().id() == cudf::type_id::BOOL8, "Filter expression must return a boolean column"); - auto num_bitmasks = num_bitmask_words(predicate.size()); - std::vector host_bitmask(num_bitmasks, ~bitmask_type{0}); - if (predicate.nullable()) { - CUDF_CUDA_TRY(cudaMemcpyAsync(host_bitmask.data(), - predicate.null_mask(), - num_bitmasks * sizeof(bitmask_type), - cudaMemcpyDefault, - stream.value())); - } + auto const host_bitmask = [&] { + auto const num_bitmasks = num_bitmask_words(predicate.size()); + if (predicate.nullable()) { + return cudf::detail::make_host_vector_sync( + device_span(predicate.null_mask(), num_bitmasks), stream); + } else { + auto bitmask = cudf::detail::make_host_vector(num_bitmasks, stream); + std::fill(bitmask.begin(), bitmask.end(), ~bitmask_type{0}); + return bitmask; + } + }(); + auto validity_it = cudf::detail::make_counting_transform_iterator( 0, [bitmask = host_bitmask.data()](auto bit_index) { return bit_is_set(bitmask, bit_index); }); diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp index dd354b905f3..170c6e8857f 100644 --- a/cpp/src/io/parquet/reader.cpp +++ b/cpp/src/io/parquet/reader.cpp @@ -16,8 +16,6 @@ #include "reader_impl.hpp" -#include - namespace cudf::io::parquet::detail { reader::reader() = default; diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index f0865c715bc..689386b8957 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -21,11 +21,9 @@ #include #include #include -#include #include #include -#include #include #include @@ -78,7 +76,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num // TODO: This step is somewhat redundant if size info has already been calculated (nested schema, // chunked reader). auto const has_strings = (kernel_mask & STRINGS_MASK) != 0; - std::vector col_string_sizes(_input_columns.size(), 0L); + auto col_string_sizes = cudf::detail::make_host_vector(_input_columns.size(), _stream); if (has_strings) { // need to compute pages bounds/sizes if we lack page indexes or are using custom bounds // TODO: we could probably dummy up size stats for FLBA data since we know the width @@ -274,6 +272,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, false, + false, error_code.data(), streams[s_idx++]); } @@ -286,6 +285,20 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, true, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch byte stream split decoder, for list columns + if (BitAnd(kernel_mask, decode_kernel_mask::BYTE_STREAM_SPLIT_FIXED_WIDTH_LIST) != 0) { + DecodeSplitPageFixedWidthData(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + true, error_code.data(), streams[s_idx++]); } @@ -309,6 +322,20 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, false, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder for lists + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_NO_DICT_LIST) != 0) { + DecodePageDataFixed(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + true, error_code.data(), streams[s_idx++]); } @@ -321,6 +348,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, true, + false, error_code.data(), streams[s_idx++]); } @@ -333,6 +361,20 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, false, + false, + error_code.data(), + streams[s_idx++]); + } + + // launch fixed width type decoder with dictionaries for lists + if (BitAnd(kernel_mask, decode_kernel_mask::FIXED_WIDTH_DICT_LIST) != 0) { + DecodePageDataFixedDict(subpass.pages, + pass.chunks, + num_rows, + skip_rows, + level_type_size, + true, + true, error_code.data(), streams[s_idx++]); } @@ -345,6 +387,7 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num skip_rows, level_type_size, true, + false, error_code.data(), streams[s_idx++]); } diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 62ffc4d3077..3aa9b94ed6b 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -284,7 +284,7 @@ class reader::impl { * * @return Vector of total string data sizes for each column */ - std::vector calculate_page_string_offsets(); + cudf::detail::host_vector calculate_page_string_offsets(); /** * @brief Converts the page data and outputs to columns. diff --git a/cpp/src/io/parquet/reader_impl_chunking.hpp b/cpp/src/io/parquet/reader_impl_chunking.hpp index 3a3cdd34a58..a0c2dbd3e44 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.hpp +++ b/cpp/src/io/parquet/reader_impl_chunking.hpp @@ -107,7 +107,7 @@ struct subpass_intermediate_data { * rowgroups may represent less than all of the rowgroups to be read for the file. */ struct pass_intermediate_data { - std::vector> raw_page_data; + std::vector raw_page_data; // rowgroup, chunk and page information for the current pass. bool has_compressed_data{false}; diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 5138a92ac14..f03f1214b9a 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -218,7 +218,7 @@ void generate_depth_remappings( */ [[nodiscard]] std::future read_column_chunks_async( std::vector> const& sources, - std::vector>& page_data, + cudf::host_span page_data, cudf::detail::hostdevice_vector& chunks, size_t begin_chunk, size_t end_chunk, @@ -251,23 +251,24 @@ void generate_depth_remappings( if (source->is_device_read_preferred(io_size)) { // Buffer needs to be padded. // Required by `gpuDecodePageData`. - auto buffer = + page_data[chunk] = rmm::device_buffer(cudf::util::round_up_safe(io_size, BUFFER_PADDING_MULTIPLE), stream); auto fut_read_size = source->device_read_async( - io_offset, io_size, static_cast(buffer.data()), stream); + io_offset, io_size, static_cast(page_data[chunk].data()), stream); read_tasks.emplace_back(std::move(fut_read_size)); - page_data[chunk] = datasource::buffer::create(std::move(buffer)); } else { auto const read_buffer = source->host_read(io_offset, io_size); // Buffer needs to be padded. // Required by `gpuDecodePageData`. - auto tmp_buffer = rmm::device_buffer( + page_data[chunk] = rmm::device_buffer( cudf::util::round_up_safe(read_buffer->size(), BUFFER_PADDING_MULTIPLE), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync( - tmp_buffer.data(), read_buffer->data(), read_buffer->size(), cudaMemcpyDefault, stream)); - page_data[chunk] = datasource::buffer::create(std::move(tmp_buffer)); + CUDF_CUDA_TRY(cudaMemcpyAsync(page_data[chunk].data(), + read_buffer->data(), + read_buffer->size(), + cudaMemcpyDefault, + stream)); } - auto d_compdata = page_data[chunk]->data(); + auto d_compdata = static_cast(page_data[chunk].data()); do { chunks[chunk].compressed_data = d_compdata; d_compdata += chunks[chunk].compressed_size; @@ -980,7 +981,7 @@ std::pair> reader::impl::read_column_chunks() std::vector chunk_source_map(num_chunks); // Tracker for eventually deallocating compressed and uncompressed data - raw_page_data = std::vector>(num_chunks); + raw_page_data = std::vector(num_chunks); // Keep track of column chunk file offsets std::vector column_chunk_offsets(num_chunks); @@ -1629,10 +1630,10 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num get_page_nesting_size{ d_cols_info.data(), max_depth, subpass.pages.size(), subpass.pages.device_begin()}); - // Manually create a int64_t `key_start` compatible counting_transform_iterator to avoid - // implicit casting to size_type. - auto const reduction_keys = thrust::make_transform_iterator( - thrust::make_counting_iterator(key_start), get_reduction_key{subpass.pages.size()}); + // Manually create a size_t `key_start` compatible counting_transform_iterator. + auto const reduction_keys = + thrust::make_transform_iterator(thrust::make_counting_iterator(key_start), + get_reduction_key{subpass.pages.size()}); // Find the size of each column thrust::reduce_by_key(rmm::exec_policy_nosync(_stream), @@ -1695,15 +1696,14 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num nullmask_bufs, std::numeric_limits::max(), _stream); } -std::vector reader::impl::calculate_page_string_offsets() +cudf::detail::host_vector reader::impl::calculate_page_string_offsets() { auto& pass = *_pass_itm_data; auto& subpass = *pass.subpass; auto page_keys = make_page_key_iterator(subpass.pages); - std::vector col_sizes(_input_columns.size(), 0L); - rmm::device_uvector d_col_sizes(col_sizes.size(), _stream); + rmm::device_uvector d_col_sizes(_input_columns.size(), _stream); // use page_index to fetch page string sizes in the proper order auto val_iter = thrust::make_transform_iterator(subpass.pages.device_begin(), @@ -1717,7 +1717,7 @@ std::vector reader::impl::calculate_page_string_offsets() page_offset_output_iter{subpass.pages.device_ptr()}); // now sum up page sizes - rmm::device_uvector reduce_keys(col_sizes.size(), _stream); + rmm::device_uvector reduce_keys(d_col_sizes.size(), _stream); thrust::reduce_by_key(rmm::exec_policy_nosync(_stream), page_keys, page_keys + subpass.pages.size(), @@ -1725,14 +1725,7 @@ std::vector reader::impl::calculate_page_string_offsets() reduce_keys.begin(), d_col_sizes.begin()); - cudaMemcpyAsync(col_sizes.data(), - d_col_sizes.data(), - sizeof(size_t) * col_sizes.size(), - cudaMemcpyDeviceToHost, - _stream); - _stream.synchronize(); - - return col_sizes; + return cudf::detail::make_host_vector_sync(d_col_sizes, _stream); } } // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/rle_stream.cuh b/cpp/src/io/parquet/rle_stream.cuh index 4a0791d5c54..69e783a89d0 100644 --- a/cpp/src/io/parquet/rle_stream.cuh +++ b/cpp/src/io/parquet/rle_stream.cuh @@ -19,6 +19,7 @@ #include "parquet_gpu.hpp" #include +#include namespace cudf::io::parquet::detail { @@ -216,6 +217,26 @@ struct rle_stream { decode_index = -1; // signals the first iteration. Nothing to decode. } + __device__ inline int get_rle_run_info(rle_run& run) + { + run.start = cur; + run.level_run = get_vlq32(run.start, end); + + // run_bytes includes the header size + int run_bytes = run.start - cur; + if (is_literal_run(run.level_run)) { + // from the parquet spec: literal runs always come in multiples of 8 values. + run.size = (run.level_run >> 1) * 8; + run_bytes += util::div_rounding_up_unsafe(run.size * level_bits, 8); + } else { + // repeated value run + run.size = (run.level_run >> 1); + run_bytes += util::div_rounding_up_unsafe(level_bits, 8); + } + + return run_bytes; + } + __device__ inline void fill_run_batch() { // decode_index == -1 means we are on the very first decode iteration for this stream. @@ -226,31 +247,14 @@ struct rle_stream { while (((decode_index == -1 && fill_index < num_rle_stream_decode_warps) || fill_index < decode_index + run_buffer_size) && cur < end) { - auto& run = runs[rolling_index(fill_index)]; - // Encoding::RLE + // Pass by reference to fill the runs shared memory with the run data + auto& run = runs[rolling_index(fill_index)]; + int const run_bytes = get_rle_run_info(run); - // bytes for the varint header - uint8_t const* _cur = cur; - int const level_run = get_vlq32(_cur, end); - // run_bytes includes the header size - int run_bytes = _cur - cur; - - // literal run - if (is_literal_run(level_run)) { - // from the parquet spec: literal runs always come in multiples of 8 values. - run.size = (level_run >> 1) * 8; - run_bytes += ((run.size * level_bits) + 7) >> 3; - } - // repeated value run - else { - run.size = (level_run >> 1); - run_bytes += ((level_bits) + 7) >> 3; - } - run.output_pos = output_pos; - run.start = _cur; - run.level_run = level_run; run.remaining = run.size; + run.output_pos = output_pos; + cur += run_bytes; output_pos += run.size; fill_index++; @@ -372,6 +376,39 @@ struct rle_stream { return values_processed_shared; } + __device__ inline int skip_runs(int target_count) + { + // we want to process all runs UP TO BUT NOT INCLUDING the run that overlaps with the skip + // amount so threads spin like crazy on fill_run_batch(), skipping writing unnecessary run info. + // then when it hits the one that matters, we don't process it at all and bail as if we never + // started basically we're setting up the rle_stream vars necessary to start fill_run_batch for + // the first time + while (cur < end) { + rle_run run; + int run_bytes = get_rle_run_info(run); + + if ((output_pos + run.size) > target_count) { + return output_pos; // bail! we've reached the starting run + } + + // skip this run + output_pos += run.size; + cur += run_bytes; + } + + return output_pos; // we skipped everything + } + + __device__ inline int skip_decode(int t, int count) + { + int const output_count = min(count, total_values - cur_values); + + // if level_bits == 0, there's nothing to do + // a very common case: columns with no nulls, especially if they are non-nested + cur_values = (level_bits == 0) ? output_count : skip_runs(output_count); + return cur_values; + } + __device__ inline int decode_next(int t) { return decode_next(t, max_output_values); } }; diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 190f13eb688..f865c9a7643 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -183,7 +183,7 @@ struct aggregate_writer_metadata { std::vector row_groups; std::vector key_value_metadata; std::vector offset_indexes; - std::vector> column_indexes; + std::vector> column_indexes; }; std::vector files; std::optional> column_orders = std::nullopt; @@ -1543,12 +1543,7 @@ void encode_pages(hostdevice_2dvector& chunks, d_chunks.flat_view(), {column_stats, pages.size()}, column_index_truncate_length, stream); } - auto h_chunks = chunks.host_view(); - CUDF_CUDA_TRY(cudaMemcpyAsync(h_chunks.data(), - d_chunks.data(), - d_chunks.flat_view().size_bytes(), - cudaMemcpyDefault, - stream.value())); + chunks.device_to_host_async(stream); if (comp_stats.has_value()) { comp_stats.value() += collect_compression_statistics(comp_in, comp_res, stream); @@ -2559,12 +2554,11 @@ void writer::impl::write_parquet_data_to_sink( } else { CUDF_EXPECTS(bounce_buffer.size() >= ck.compressed_size, "Bounce buffer was not properly initialized."); - CUDF_CUDA_TRY(cudaMemcpyAsync(bounce_buffer.data(), - dev_bfr + ck.ck_stat_size, - ck.compressed_size, - cudaMemcpyDefault, - _stream.value())); - _stream.synchronize(); + cudf::detail::cuda_memcpy( + host_span{bounce_buffer}.subspan(0, ck.compressed_size), + device_span{dev_bfr + ck.ck_stat_size, ck.compressed_size}, + _stream); + _out_sink[p]->host_write(bounce_buffer.data(), ck.compressed_size); } @@ -2600,13 +2594,8 @@ void writer::impl::write_parquet_data_to_sink( auto const& column_chunk_meta = row_group.columns[i].meta_data; // start transfer of the column index - std::vector column_idx; - column_idx.resize(ck.column_index_size); - CUDF_CUDA_TRY(cudaMemcpyAsync(column_idx.data(), - ck.column_index_blob, - ck.column_index_size, - cudaMemcpyDefault, - _stream.value())); + auto column_idx = cudf::detail::make_host_vector_async( + device_span{ck.column_index_blob, ck.column_index_size}, _stream); // calculate offsets while the column index is transferring int64_t curr_pg_offset = column_chunk_meta.data_page_offset; diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index badcd3f58f9..06069630685 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -74,8 +74,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader { // Buffer needs to be padded. // Required by `inflate_kernel`. device.resize(cudf::util::round_up_safe(host.size(), BUFFER_PADDING_MULTIPLE), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync( - device.data(), host.data(), host.size() * sizeof(T), cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{device}.subspan(0, host.size()), host, stream); } struct decompression_blocks { diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 58faa0ebfe4..f4a2f29026a 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -22,10 +22,6 @@ #include #include -#include - -#include - #include namespace cudf::io::text { @@ -87,8 +83,10 @@ class datasource_chunk_reader : public data_chunk_reader { _source->host_read(_offset, read_size, reinterpret_cast(h_ticket.buffer.data())); // copy the host-pinned data on to device - CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{chunk}.subspan(0, read_size), + host_span{h_ticket.buffer}.subspan(0, read_size), + stream); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); @@ -153,8 +151,10 @@ class istream_data_chunk_reader : public data_chunk_reader { auto chunk = rmm::device_uvector(read_size, stream); // copy the host-pinned data on to device - CUDF_CUDA_TRY(cudaMemcpyAsync( - chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyDefault, stream.value())); + cudf::detail::cuda_memcpy_async( + device_span{chunk}.subspan(0, read_size), + host_span{h_ticket.buffer}.subspan(0, read_size), + stream); // record the host-to-device copy. CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); @@ -193,12 +193,10 @@ class host_span_data_chunk_reader : public data_chunk_reader { auto chunk = rmm::device_uvector(read_size, stream); // copy the host data to device - CUDF_CUDA_TRY(cudaMemcpyAsync( // - chunk.data(), - _data.data() + _position, - read_size, - cudaMemcpyDefault, - stream.value())); + cudf::detail::cuda_memcpy_async( + cudf::device_span{chunk}.subspan(0, read_size), + cudf::host_span{_data}.subspan(_position, read_size), + stream); _position += read_size; diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 249dc3b5875..6d954753af8 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -21,12 +21,12 @@ #include "column_buffer.hpp" +#include #include #include #include #include -#include #include namespace cudf::io::detail { diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index e73b2bc88de..31c8b781e77 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -22,12 +22,9 @@ #pragma once #include -#include #include -#include #include #include -#include #include #include @@ -35,6 +32,8 @@ #include +#include + namespace cudf { namespace io { namespace detail { diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index a3afbd52896..b66742569d9 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -16,11 +16,10 @@ #include "getenv_or.hpp" -#include #include -#include -#include +#include + #include namespace cudf::io { @@ -53,6 +52,14 @@ bool is_gds_enabled() { return is_always_enabled() or get_env_policy() == usage_ bool is_kvikio_enabled() { return get_env_policy() == usage_policy::KVIKIO; } +void set_thread_pool_nthreads_from_env() +{ + static std::once_flag flag{}; + std::call_once(flag, [] { + auto nthreads = getenv_or("KVIKIO_NTHREADS", 8U); + kvikio::defaults::thread_pool_nthreads_reset(nthreads); + }); +} } // namespace cufile_integration namespace nvcomp_integration { @@ -81,5 +88,4 @@ bool is_all_enabled() { return get_env_policy() == usage_policy::ALWAYS; } bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_policy::STABLE; } } // namespace nvcomp_integration - } // namespace cudf::io diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index f70171eef68..0c49b2e5d78 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -800,7 +801,7 @@ template static std::unique_ptr parse_string(string_view_pair_it str_tuples, size_type col_size, rmm::device_buffer&& null_mask, - rmm::device_scalar& d_null_count, + cudf::detail::device_scalar& d_null_count, cudf::io::parse_options_view const& options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -930,7 +931,7 @@ std::unique_ptr parse_data( CUDF_FUNC_RANGE(); if (col_size == 0) { return make_empty_column(col_type); } - auto d_null_count = rmm::device_scalar(null_count, stream); + auto d_null_count = cudf::detail::device_scalar(null_count, stream); auto null_count_data = d_null_count.data(); if (null_mask.is_empty()) { null_mask = cudf::create_null_mask(col_size, mask_state::ALL_VALID, stream, mr); diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index 0b76f3d3e8f..a8a275919d8 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -42,6 +42,7 @@ class file_sink : public data_sink { if (!_output_stream.is_open()) { detail::throw_on_file_open_failure(filepath, true); } if (cufile_integration::is_kvikio_enabled()) { + cufile_integration::set_thread_pool_nthreads_from_env(); _kvikio_file = kvikio::FileHandle(filepath, "w"); CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 28f7f08521e..9668b30e9a9 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -15,8 +15,10 @@ */ #include "file_io_utilities.hpp" +#include "getenv_or.hpp" #include +#include #include #include #include @@ -31,7 +33,6 @@ #include #include -#include #include namespace cudf { @@ -47,6 +48,7 @@ class file_source : public datasource { { detail::force_init_cuda_context(); if (cufile_integration::is_kvikio_enabled()) { + cufile_integration::set_thread_pool_nthreads_from_env(); _kvikio_file = kvikio::FileHandle(filepath); CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); @@ -245,17 +247,18 @@ class device_buffer_source final : public datasource { size_t host_read(size_t offset, size_t size, uint8_t* dst) override { auto const count = std::min(size, this->size() - offset); - auto const stream = cudf::get_default_stream(); - CUDF_CUDA_TRY( - cudaMemcpyAsync(dst, _d_buffer.data() + offset, count, cudaMemcpyDefault, stream.value())); - stream.synchronize(); + auto const stream = cudf::detail::global_cuda_stream_pool().get_stream(); + cudf::detail::cuda_memcpy(host_span{dst, count}, + device_span{ + reinterpret_cast(_d_buffer.data() + offset), count}, + stream); return count; } std::unique_ptr host_read(size_t offset, size_t size) override { auto const count = std::min(size, this->size() - offset); - auto const stream = cudf::get_default_stream(); + auto const stream = cudf::detail::global_cuda_stream_pool().get_stream(); auto h_data = cudf::detail::make_host_vector_async( cudf::device_span{_d_buffer.data() + offset, count}, stream); stream.synchronize(); @@ -392,14 +395,21 @@ std::unique_ptr datasource::create(std::string const& filepath, size_t offset, size_t max_size_estimate) { -#ifdef CUFILE_FOUND - if (cufile_integration::is_always_enabled()) { - // avoid mmap as GDS is expected to be used for most reads + auto const use_memory_mapping = [] { + auto const policy = getenv_or("LIBCUDF_MMAP_ENABLED", std::string{"ON"}); + + if (policy == "ON") { return true; } + if (policy == "OFF") { return false; } + + CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy); + }(); + + if (use_memory_mapping) { + return std::make_unique(filepath.c_str(), offset, max_size_estimate); + } else { + // `file_source` reads the file directly, without memory mapping return std::make_unique(filepath.c_str()); } -#endif - // Use our own memory mapping implementation for direct file reads - return std::make_unique(filepath.c_str(), offset, max_size_estimate); } std::unique_ptr datasource::create(host_buffer const& buffer) diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index d7b54399f8d..cf19bc591cc 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -22,8 +22,6 @@ #include #include -#include - #include #include @@ -110,7 +108,11 @@ class cufile_shim { ~cufile_shim() { - if (driver_close != nullptr) driver_close(); + // Explicit cuFile driver close should not be performed here to avoid segfault. However, in the + // absence of driver_close(), cuFile will implicitly do that, which in most cases causes + // segfault anyway. TODO: Revisit this conundrum once cuFile is fixed. + // https://github.com/rapidsai/cudf/issues/17121 + if (cf_lib != nullptr) dlclose(cf_lib); } @@ -239,7 +241,7 @@ std::vector> make_sliced_tasks( std::vector> slice_tasks; std::transform(slices.cbegin(), slices.cend(), std::back_inserter(slice_tasks), [&](auto& slice) { return pool.submit_task( - [&] { return function(ptr + slice.offset, slice.size, offset + slice.offset); }); + [=] { return function(ptr + slice.offset, slice.size, offset + slice.offset); }); }); return slice_tasks; } diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 7e47b5b3d10..584b6213fa3 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -104,7 +104,7 @@ class cufile_shim; /** * @brief Class that provides RAII for cuFile file registration. */ -struct cufile_registered_file { +class cufile_registered_file { void register_handle(); public: diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index af1ba16a424..f969b45727b 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -176,13 +176,19 @@ class hostdevice_2dvector { operator device_2dspan() const { return {device_span{_data}, _size.second}; } device_2dspan device_view() { return static_cast>(*this); } - device_2dspan device_view() const { return static_cast>(*this); } + [[nodiscard]] device_2dspan device_view() const + { + return static_cast>(*this); + } operator host_2dspan() { return {host_span{_data}, _size.second}; } operator host_2dspan() const { return {host_span{_data}, _size.second}; } host_2dspan host_view() { return static_cast>(*this); } - host_2dspan host_view() const { return static_cast>(*this); } + [[nodiscard]] host_2dspan host_view() const + { + return static_cast>(*this); + } host_span operator[](size_t row) { @@ -194,16 +200,19 @@ class hostdevice_2dvector { return host_span{_data}.subspan(row * _size.second, _size.second); } - auto size() const noexcept { return _size; } - auto count() const noexcept { return _size.first * _size.second; } - auto is_empty() const noexcept { return count() == 0; } + [[nodiscard]] auto size() const noexcept { return _size; } + [[nodiscard]] auto count() const noexcept { return _size.first * _size.second; } + [[nodiscard]] auto is_empty() const noexcept { return count() == 0; } T* base_host_ptr(size_t offset = 0) { return _data.host_ptr(offset); } T* base_device_ptr(size_t offset = 0) { return _data.device_ptr(offset); } - T const* base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); } + [[nodiscard]] T const* base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); } - T const* base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); } + [[nodiscard]] T const* base_device_ptr(size_t offset = 0) const + { + return _data.device_ptr(offset); + } [[nodiscard]] size_t size_bytes() const noexcept { return _data.size_bytes(); } diff --git a/cpp/src/io/utilities/row_selection.cpp b/cpp/src/io/utilities/row_selection.cpp index c0bbca39167..cf252fe63af 100644 --- a/cpp/src/io/utilities/row_selection.cpp +++ b/cpp/src/io/utilities/row_selection.cpp @@ -16,10 +16,7 @@ #include "io/utilities/row_selection.hpp" -#include - #include -#include namespace cudf::io::detail { diff --git a/cpp/src/io/utilities/row_selection.hpp b/cpp/src/io/utilities/row_selection.hpp index 7c607099cdc..e826feff201 100644 --- a/cpp/src/io/utilities/row_selection.hpp +++ b/cpp/src/io/utilities/row_selection.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include diff --git a/cpp/src/io/utilities/type_inference.cu b/cpp/src/io/utilities/type_inference.cu index 43dc38c4ac6..af32b207d20 100644 --- a/cpp/src/io/utilities/type_inference.cu +++ b/cpp/src/io/utilities/type_inference.cu @@ -18,11 +18,10 @@ #include "io/utilities/string_parsing.hpp" #include "io/utilities/trie.cuh" +#include #include #include -#include - #include #include @@ -242,7 +241,7 @@ cudf::io::column_type_histogram infer_column_type(OptionsView const& options, constexpr int block_size = 128; auto const grid_size = (size + block_size - 1) / block_size; - auto d_column_info = rmm::device_scalar(stream); + auto d_column_info = cudf::detail::device_scalar(stream); CUDF_CUDA_TRY(cudaMemsetAsync( d_column_info.data(), 0, sizeof(cudf::io::column_type_histogram), stream.value())); diff --git a/cpp/src/jit/cache.cpp b/cpp/src/jit/cache.cpp index 89c47d246d0..34a0bdce124 100644 --- a/cpp/src/jit/cache.cpp +++ b/cpp/src/jit/cache.cpp @@ -16,11 +16,8 @@ #include -#include - #include -#include #include namespace cudf { diff --git a/cpp/src/jit/util.cpp b/cpp/src/jit/util.cpp index 0585e02a031..d9a29203133 100644 --- a/cpp/src/jit/util.cpp +++ b/cpp/src/jit/util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,6 @@ #include #include -#include - namespace cudf { namespace jit { struct get_data_ptr_functor { diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index 2ec23e0dc6d..40d1c925889 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -81,7 +82,7 @@ std::unique_ptr> conditional_join_anti_semi( join_size = *output_size; } else { // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); if (has_nulls) { compute_conditional_join_output_size <<>>( @@ -94,7 +95,7 @@ std::unique_ptr> conditional_join_anti_semi( join_size = size.value(stream); } - rmm::device_scalar write_index(0, stream); + cudf::detail::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); @@ -197,7 +198,7 @@ conditional_join(table_view const& left, join_size = *output_size; } else { // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); if (has_nulls) { compute_conditional_join_output_size <<>>( @@ -231,7 +232,7 @@ conditional_join(table_view const& left, std::make_unique>(0, stream, mr)); } - rmm::device_scalar write_index(0, stream); + cudf::detail::device_scalar write_index(0, stream); auto left_indices = std::make_unique>(join_size, stream, mr); auto right_indices = std::make_unique>(join_size, stream, mr); @@ -342,7 +343,7 @@ std::size_t compute_conditional_join_output_size(table_view const& left, auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block; // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); // Determine number of output rows without actually building the output to simply // find what the size of the output will be. diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index c7294152982..515d28201e8 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -27,7 +27,6 @@ #include #include -#include #include #include diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh index 84e9be45030..4049ccf35e1 100644 --- a/cpp/src/join/mixed_join_size_kernel.cuh +++ b/cpp/src/join/mixed_join_size_kernel.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -122,7 +123,7 @@ std::size_t launch_compute_mixed_join_output_size( rmm::device_async_resource_ref mr) { // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); + cudf::detail::device_scalar size(0, stream, mr); compute_mixed_join_output_size <<>>( diff --git a/cpp/src/json/json_path.cu b/cpp/src/json/json_path.cu index 59fdbedf089..fb5cf66dd60 100644 --- a/cpp/src/json/json_path.cu +++ b/cpp/src/json/json_path.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1031,7 +1032,7 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c cudf::detail::create_null_mask(col.size(), mask_state::UNINITIALIZED, stream, mr); // compute results - rmm::device_scalar d_valid_count{0, stream}; + cudf::detail::device_scalar d_valid_count{0, stream}; get_json_object_kernel <<>>( diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 17008e80e79..ebab3beb08f 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -834,10 +834,11 @@ std::pair, std::vector> partition( table_view const& t, column_view const& partition_map, size_type num_partitions, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::partition(t, partition_map, num_partitions, cudf::get_default_stream(), mr); + return detail::partition(t, partition_map, num_partitions, stream, mr); } } // namespace cudf diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index 5a4c90a67a5..ab6ab393878 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -273,11 +273,11 @@ std::pair, std::vector> round_robi table_view const& input, cudf::size_type num_partitions, cudf::size_type start_partition, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::round_robin_partition( - input, num_partitions, start_partition, cudf::get_default_stream(), mr); + return detail::round_robin_partition(input, num_partitions, start_partition, stream, mr); } } // namespace cudf diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu index b0a84a6d50c..d27420658d6 100644 --- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu +++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu @@ -1126,12 +1126,8 @@ std::pair, rmm::device_uvector> generate_mer * `max` of 0. * * @param tdv input tdigests. The tdigests within this column are grouped by key. - * @param h_group_offsets a host iterator of the offsets to the start of each group. A group is - * counted as one even when the cluster is empty in it. The offsets should have the same values as - * the ones in `group_offsets`. * @param group_offsets a device iterator of the offsets to the start of each group. A group is - * counted as one even when the cluster is empty in it. The offsets should have the same values as - * the ones in `h_group_offsets`. + * counted as one even when the cluster is empty in it. * @param group_labels a device iterator of the the group label for each tdigest cluster including * empty clusters. * @param num_group_labels the number of unique group labels. @@ -1142,9 +1138,8 @@ std::pair, rmm::device_uvector> generate_mer * * @return A column containing the merged tdigests. */ -template +template std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, - HGroupOffsetIter h_group_offsets, GroupOffsetIter group_offsets, GroupLabelIter group_labels, size_t num_group_labels, @@ -1313,21 +1308,13 @@ std::unique_ptr reduce_merge_tdigest(column_view const& input, if (input.size() == 0) { return cudf::tdigest::detail::make_empty_tdigest_scalar(stream, mr); } - auto group_offsets_ = group_offsets_fn{input.size()}; - auto h_group_offsets = cudf::detail::make_counting_transform_iterator(0, group_offsets_); - auto group_offsets = cudf::detail::make_counting_transform_iterator(0, group_offsets_); - auto group_labels = thrust::make_constant_iterator(0); - return to_tdigest_scalar(merge_tdigests(tdv, - h_group_offsets, - group_offsets, - group_labels, - input.size(), - 1, - max_centroids, - stream, - mr), - stream, - mr); + auto group_offsets_ = group_offsets_fn{input.size()}; + auto group_offsets = cudf::detail::make_counting_transform_iterator(0, group_offsets_); + auto group_labels = thrust::make_constant_iterator(0); + return to_tdigest_scalar( + merge_tdigests(tdv, group_offsets, group_labels, input.size(), 1, max_centroids, stream, mr), + stream, + mr); } std::unique_ptr group_tdigest(column_view const& col, @@ -1376,16 +1363,7 @@ std::unique_ptr group_merge_tdigest(column_view const& input, return cudf::tdigest::detail::make_empty_tdigests_column(num_groups, stream, mr); } - // bring group offsets back to the host - std::vector h_group_offsets(group_offsets.size()); - cudaMemcpyAsync(h_group_offsets.data(), - group_offsets.begin(), - sizeof(size_type) * group_offsets.size(), - cudaMemcpyDefault, - stream); - return merge_tdigests(tdv, - h_group_offsets.begin(), group_offsets.data(), group_labels.data(), group_labels.size(), diff --git a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp index a9f86ac1b5f..17844b6bb0a 100644 --- a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp +++ b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/src/reductions/all.cu b/cpp/src/reductions/all.cu index 67ea29a2cb1..890625830a5 100644 --- a/cpp/src/reductions/all.cu +++ b/cpp/src/reductions/all.cu @@ -16,6 +16,7 @@ #include "simple.cuh" +#include #include #include #include @@ -65,7 +66,8 @@ struct all_fn { cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); }(); - auto d_result = rmm::device_scalar(1, stream, cudf::get_current_device_resource_ref()); + auto d_result = + cudf::detail::device_scalar(1, stream, cudf::get_current_device_resource_ref()); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), input.size(), diff --git a/cpp/src/reductions/any.cu b/cpp/src/reductions/any.cu index 057f038c622..d70da369d72 100644 --- a/cpp/src/reductions/any.cu +++ b/cpp/src/reductions/any.cu @@ -16,6 +16,7 @@ #include "simple.cuh" +#include #include #include #include @@ -65,7 +66,8 @@ struct any_fn { cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); }(); - auto d_result = rmm::device_scalar(0, stream, cudf::get_current_device_resource_ref()); + auto d_result = + cudf::detail::device_scalar(0, stream, cudf::get_current_device_resource_ref()); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), input.size(), diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 139de068050..4f6eb23ce5b 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -69,18 +70,18 @@ struct minmax_pair { * @param num_items number of items to reduce * @param binary_op binary operator used to reduce * @param stream CUDA stream to run kernels on. - * @return rmm::device_scalar + * @return cudf::detail::device_scalar */ template ::type> -rmm::device_scalar reduce_device(InputIterator d_in, - size_type num_items, - Op binary_op, - rmm::cuda_stream_view stream) +auto reduce_device(InputIterator d_in, + size_type num_items, + Op binary_op, + rmm::cuda_stream_view stream) { OutputType identity{}; - rmm::device_scalar result{identity, stream}; + cudf::detail::device_scalar result{identity, stream}; // Allocate temporary storage size_t storage_bytes = 0; diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index d187375b69f..75ebc078930 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -26,8 +26,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index d3c0b54f286..b91ae19b51a 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -14,13 +14,10 @@ * limitations under the License. */ -#include #include #include #include #include -#include -#include namespace cudf { diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 40d1d8a0a53..c4f6c135dde 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -13,16 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include #include #include #include #include #include -#include #include -#include #include #include diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 1df1549432f..d0e3358cc34 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -137,7 +138,7 @@ struct replace_nulls_column_kernel_forwarder { auto device_out = cudf::mutable_column_device_view::create(output_view, stream); auto device_replacement = cudf::column_device_view::create(replacement, stream); - rmm::device_scalar valid_counter(0, stream); + cudf::detail::device_scalar valid_counter(0, stream); cudf::size_type* valid_count = valid_counter.data(); replace<<>>( diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 86ec8cfc91e..0cc97ca05e0 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -53,7 +54,6 @@ #include #include -#include #include #include @@ -182,7 +182,7 @@ struct replace_kernel_forwarder { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - rmm::device_scalar valid_counter(0, stream); + cudf::detail::device_scalar valid_counter(0, stream); cudf::size_type* valid_count = valid_counter.data(); auto replace = [&] { diff --git a/cpp/src/rolling/detail/optimized_unbounded_window.cpp b/cpp/src/rolling/detail/optimized_unbounded_window.cpp index 72c23395a93..7cad31c0658 100644 --- a/cpp/src/rolling/detail/optimized_unbounded_window.cpp +++ b/cpp/src/rolling/detail/optimized_unbounded_window.cpp @@ -18,13 +18,10 @@ #include #include #include -#include #include #include #include #include -#include -#include #include namespace cudf::detail { diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp index 8a53e937f98..77cb2a8c7f5 100644 --- a/cpp/src/rolling/detail/range_window_bounds.hpp +++ b/cpp/src/rolling/detail/range_window_bounds.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,7 @@ #pragma once #include -#include #include -#include -#include namespace cudf { namespace detail { diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index 528700137bf..bc0ee2eb519 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,6 @@ #include #include -#include #include #include @@ -1105,7 +1105,7 @@ struct rolling_window_launcher { auto const d_inp_ptr = column_device_view::create(input, stream); auto const d_default_out_ptr = column_device_view::create(default_outputs, stream); auto const d_out_ptr = mutable_column_device_view::create(output->mutable_view(), stream); - auto d_valid_count = rmm::device_scalar{0, stream}; + auto d_valid_count = cudf::detail::device_scalar{0, stream}; auto constexpr block_size = 256; auto const grid = cudf::detail::grid_1d(input.size(), block_size); @@ -1271,7 +1271,7 @@ std::unique_ptr rolling_window_udf(column_view const& input, udf_agg._output_type, input.size(), cudf::mask_state::UNINITIALIZED, stream, mr); auto output_view = output->mutable_view(); - rmm::device_scalar device_valid_count{0, stream}; + cudf::detail::device_scalar device_valid_count{0, stream}; std::string kernel_name = jitify2::reflection::Template("cudf::rolling::jit::gpu_rolling_new") // diff --git a/cpp/src/rolling/range_window_bounds.cpp b/cpp/src/rolling/range_window_bounds.cpp index 69792136c64..7f698dfcd6b 100644 --- a/cpp/src/rolling/range_window_bounds.cpp +++ b/cpp/src/rolling/range_window_bounds.cpp @@ -19,7 +19,6 @@ #include #include #include -#include namespace cudf { namespace { diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 8988d73fb02..332c440aea9 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -358,10 +358,11 @@ std::unique_ptr round(column_view const& input, std::unique_ptr round(column_view const& input, int32_t decimal_places, rounding_method method, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::round(input, decimal_places, method, cudf::get_default_stream(), mr); + return detail::round(input, decimal_places, method, stream, mr); } } // namespace cudf diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 31535198c58..4ec2174a96f 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -26,8 +26,6 @@ #include #include -#include - #include namespace cudf { diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 656fe61fbbe..9f242bdffe0 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -16,10 +16,8 @@ #include #include -#include #include #include -#include #include #include diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index 4c015f3cbed..6a7c8ea45e9 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -348,7 +349,7 @@ std::unique_ptr convert_case(strings_column_view const& input, // This check incurs ~20% performance hit for smaller strings and so we only use it // after the threshold check above. The check makes very little impact for long strings // but results in a large performance gain when the input contains only single-byte characters. - rmm::device_scalar mb_count(0, stream); + cudf::detail::device_scalar mb_count(0, stream); // cudf::detail::grid_1d is limited to size_type elements auto const num_blocks = util::div_rounding_up_safe(chars_size / bytes_per_thread, block_size); // we only need to check every other byte since either will contain high bit diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 0db1adf1223..f5d052c6657 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -152,12 +153,8 @@ struct format_compiler { } // create program in device memory - d_items.resize(items.size(), stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_items.data(), - items.data(), - items.size() * sizeof(items[0]), - cudaMemcpyDefault, - stream.value())); + d_items = cudf::detail::make_device_uvector_sync( + items, stream, cudf::get_current_device_resource_ref()); } format_item const* compiled_format_items() { return d_items.data(); } diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 1d9d12686eb..9e4ef47ff79 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include #include -#include #include #include @@ -242,7 +242,7 @@ std::unique_ptr concatenate(host_span columns, } { // Copy offsets columns with single kernel launch - rmm::device_scalar d_valid_count(0, stream); + cudf::detail::device_scalar d_valid_count(0, stream); constexpr size_type block_size{256}; cudf::detail::grid_1d config(offsets_count, block_size); diff --git a/cpp/src/strings/regex/regexec.cpp b/cpp/src/strings/regex/regexec.cpp index d1990733e81..60ad714dfec 100644 --- a/cpp/src/strings/regex/regexec.cpp +++ b/cpp/src/strings/regex/regexec.cpp @@ -24,7 +24,6 @@ #include #include -#include #include #include diff --git a/cpp/src/strings/replace/find_replace.cu b/cpp/src/strings/replace/find_replace.cu index 8a8001dd81a..957075017ba 100644 --- a/cpp/src/strings/replace/find_replace.cu +++ b/cpp/src/strings/replace/find_replace.cu @@ -14,6 +14,7 @@ * limitations under the License. */ #include +#include #include #include #include @@ -21,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index 352d883bdc5..88f343926c9 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -334,7 +334,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // Count the number of targets in the entire column. // Note this may over-count in the case where a target spans adjacent strings. - rmm::device_scalar d_count(0, stream); + cudf::detail::device_scalar d_count(0, stream); auto const num_blocks = util::div_rounding_up_safe( util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); count_targets<<>>(fn, chars_bytes, d_count.data()); diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index 16df0dbabdf..52ddef76c1a 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -285,7 +285,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // Count the number of targets in the entire column. // Note this may over-count in the case where a target spans adjacent strings. - rmm::device_scalar d_target_count(0, stream); + cudf::detail::device_scalar d_target_count(0, stream); constexpr int64_t block_size = 512; constexpr size_type bytes_per_thread = 4; auto const num_blocks = util::div_rounding_up_safe( diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index 81aca001d53..4b777be9d5b 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -361,7 +362,7 @@ std::pair, rmm::device_uvector> split cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset()); // count the number of delimiters in the entire column - rmm::device_scalar d_count(0, stream); + cudf::detail::device_scalar d_count(0, stream); if (chars_bytes > 0) { constexpr int64_t block_size = 512; constexpr size_type bytes_per_thread = 4; diff --git a/cpp/src/strings/strings_scalar_factories.cpp b/cpp/src/strings/strings_scalar_factories.cpp index 219d1174d42..1cc405234b2 100644 --- a/cpp/src/strings/strings_scalar_factories.cpp +++ b/cpp/src/strings/strings_scalar_factories.cpp @@ -16,7 +16,6 @@ #include #include -#include #include diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp index b0284e9cb96..e14142a9ad1 100644 --- a/cpp/src/structs/structs_column_view.cpp +++ b/cpp/src/structs/structs_column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 5df9943303d..4012ee3d21c 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -21,13 +21,10 @@ #include #include #include -#include #include #include #include -#include #include -#include #include #include diff --git a/cpp/src/table/table.cpp b/cpp/src/table/table.cpp index cb707c94288..41c64c6decb 100644 --- a/cpp/src/table/table.cpp +++ b/cpp/src/table/table.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index 8a5340dc20d..659beb749af 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -20,10 +20,7 @@ #include #include -#include - #include -#include #include namespace cudf { diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu index df25950e6d5..89ca8a089d6 100644 --- a/cpp/src/text/tokenize.cu +++ b/cpp/src/text/tokenize.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -221,7 +222,7 @@ std::unique_ptr character_tokenize(cudf::strings_column_view const // To minimize memory, count the number of characters so we can // build the output offsets without an intermediate buffer. // In the worst case each byte is a character so the output is 4x the input. - rmm::device_scalar d_count(0, stream); + cudf::detail::device_scalar d_count(0, stream); auto const num_blocks = cudf::util::div_rounding_up_safe( cudf::util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 52b96bc9039..b919ac16956 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -23,8 +23,6 @@ #include #include #include -#include -#include #include #include diff --git a/cpp/src/utilities/cuda.cpp b/cpp/src/utilities/cuda.cpp index 53ca0608170..d979bda41d0 100644 --- a/cpp/src/utilities/cuda.cpp +++ b/cpp/src/utilities/cuda.cpp @@ -18,8 +18,6 @@ #include #include -#include - namespace cudf::detail { cudf::size_type num_multiprocessors() diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp index 9d8e3cf2fa6..e30806a5011 100644 --- a/cpp/src/utilities/host_memory.cpp +++ b/cpp/src/utilities/host_memory.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/utilities/prefetch.cpp b/cpp/src/utilities/prefetch.cpp index 58971552758..000526723c4 100644 --- a/cpp/src/utilities/prefetch.cpp +++ b/cpp/src/utilities/prefetch.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include diff --git a/cpp/src/utilities/stream_pool.cpp b/cpp/src/utilities/stream_pool.cpp index 8c29182bfb5..7069b59be26 100644 --- a/cpp/src/utilities/stream_pool.cpp +++ b/cpp/src/utilities/stream_pool.cpp @@ -23,7 +23,6 @@ #include #include -#include #include #include diff --git a/cpp/src/utilities/traits.cpp b/cpp/src/utilities/traits.cpp index a68dc84e340..c1e71f5f8f9 100644 --- a/cpp/src/utilities/traits.cpp +++ b/cpp/src/utilities/traits.cpp @@ -19,8 +19,6 @@ #include #include -#include - namespace cudf { namespace { diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index 3095b342748..84c8529641d 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -21,8 +21,6 @@ #include #include -#include - #include namespace cudf { diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a4213dcbe94..6d3d1454462 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -611,7 +611,6 @@ ConfigureTest( text/bpe_tests.cpp text/edit_distance_tests.cpp text/jaccard_tests.cpp - text/minhash_tests.cpp text/ngrams_tests.cpp text/ngrams_tokenize_tests.cpp text/normalize_tests.cpp @@ -711,11 +710,13 @@ ConfigureTest(STREAM_MULTIBYTE_SPLIT_TEST streams/io/multibyte_split_test.cpp ST ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ORCIO_TEST streams/io/orc_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_PARQUETIO_TEST streams/io/parquet_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_PARTITIONING_TEST streams/partitioning_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_POOL_TEST streams/pool_test.cu STREAM_MODE testing) ConfigureTest(STREAM_REDUCTION_TEST streams/reduction_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_RESHAPE_TEST streams/reshape_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_ROUND_TEST streams/round_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp index a4bde50a21e..7af88d8aa34 100644 --- a/cpp/tests/ast/transform_tests.cpp +++ b/cpp/tests/ast/transform_tests.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -26,14 +25,8 @@ #include #include #include -#include -#include -#include #include #include -#include - -#include #include @@ -41,7 +34,6 @@ #include #include #include -#include #include template diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp index aa5b49567e6..3bd67001c16 100644 --- a/cpp/tests/binaryop/binop-compiled-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -26,9 +26,7 @@ #include #include #include -#include #include -#include #include diff --git a/cpp/tests/binaryop/binop-generic-ptx-test.cpp b/cpp/tests/binaryop/binop-generic-ptx-test.cpp index 03cc87a1968..e9a2761db4a 100644 --- a/cpp/tests/binaryop/binop-generic-ptx-test.cpp +++ b/cpp/tests/binaryop/binop-generic-ptx-test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Copyright 2018-2019 BlazingDB, Inc. * Copyright 2018 Christian Noboa Mardini @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index fe221fb1c48..799bf646e52 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/column/bit_cast_test.cpp b/cpp/tests/column/bit_cast_test.cpp index ab230ab036e..5570a7d498c 100644 --- a/cpp/tests/column/bit_cast_test.cpp +++ b/cpp/tests/column/bit_cast_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -26,8 +25,6 @@ #include -#include - template struct rep_type_impl { using type = void; diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp index 631f5150829..d700adaebd5 100644 --- a/cpp/tests/column/column_test.cpp +++ b/cpp/tests/column/column_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/column/column_view_device_span_test.cpp b/cpp/tests/column/column_view_device_span_test.cpp index 6de9121158b..470437f4112 100644 --- a/cpp/tests/column/column_view_device_span_test.cpp +++ b/cpp/tests/column/column_view_device_span_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp index 37ab4b8f387..ad344476332 100644 --- a/cpp/tests/column/column_view_shallow_test.cpp +++ b/cpp/tests/column/column_view_shallow_test.cpp @@ -15,9 +15,7 @@ */ #include -#include #include -#include #include #include diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index 603187f0330..aa9d508b6aa 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -26,11 +26,8 @@ #include #include #include -#include #include -#include - #include class ColumnFactoryTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp index 18140c34abd..aedc498964a 100644 --- a/cpp/tests/copying/concatenate_tests.cpp +++ b/cpp/tests/copying/concatenate_tests.cpp @@ -34,8 +34,6 @@ #include #include -#include - #include #include #include diff --git a/cpp/tests/copying/copy_if_else_nested_tests.cpp b/cpp/tests/copying/copy_if_else_nested_tests.cpp index cfbd181f944..e1cdfe9beed 100644 --- a/cpp/tests/copying/copy_if_else_nested_tests.cpp +++ b/cpp/tests/copying/copy_if_else_nested_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/copy_range_tests.cpp b/cpp/tests/copying/copy_range_tests.cpp index 25d93da277b..e2133a546e4 100644 --- a/cpp/tests/copying/copy_range_tests.cpp +++ b/cpp/tests/copying/copy_range_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/copy_tests.cpp b/cpp/tests/copying/copy_tests.cpp index 4124f749012..9c00725d5d2 100644 --- a/cpp/tests/copying/copy_tests.cpp +++ b/cpp/tests/copying/copy_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/gather_list_tests.cpp b/cpp/tests/copying/gather_list_tests.cpp index 247090aac90..93f71345c5c 100644 --- a/cpp/tests/copying/gather_list_tests.cpp +++ b/cpp/tests/copying/gather_list_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,6 @@ #include #include #include -#include -#include #include #include diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp index 28098878086..795e3f30aa1 100644 --- a/cpp/tests/copying/gather_str_tests.cpp +++ b/cpp/tests/copying/gather_str_tests.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/gather_struct_tests.cpp b/cpp/tests/copying/gather_struct_tests.cpp index 1598ab2646a..b2c0f7acc3a 100644 --- a/cpp/tests/copying/gather_struct_tests.cpp +++ b/cpp/tests/copying/gather_struct_tests.cpp @@ -17,20 +17,15 @@ #include #include #include -#include #include #include #include #include #include -#include -#include -#include #include #include #include -#include #include diff --git a/cpp/tests/copying/gather_tests.cpp b/cpp/tests/copying/gather_tests.cpp index 07ce672b14d..908dcd67673 100644 --- a/cpp/tests/copying/gather_tests.cpp +++ b/cpp/tests/copying/gather_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 90ff97e7355..b2d64dac7c8 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -16,10 +16,8 @@ #include #include -#include #include #include -#include #include #include diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp index 4f28ff12941..1f76efdc4c3 100644 --- a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp +++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp @@ -16,13 +16,10 @@ #include #include #include -#include #include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/copying/reverse_tests.cpp b/cpp/tests/copying/reverse_tests.cpp index e4b2d319ddf..46516436901 100644 --- a/cpp/tests/copying/reverse_tests.cpp +++ b/cpp/tests/copying/reverse_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,17 +17,13 @@ #include #include #include -#include #include #include #include -#include -#include #include #include -#include #include #include #include diff --git a/cpp/tests/copying/sample_tests.cpp b/cpp/tests/copying/sample_tests.cpp index 2f76e3f1fcd..8be5d8c1fbb 100644 --- a/cpp/tests/copying/sample_tests.cpp +++ b/cpp/tests/copying/sample_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,12 +15,9 @@ */ #include -#include #include -#include #include -#include #include #include #include diff --git a/cpp/tests/copying/scatter_list_scalar_tests.cpp b/cpp/tests/copying/scatter_list_scalar_tests.cpp index 42d2e004d6b..23faa6e5b86 100644 --- a/cpp/tests/copying/scatter_list_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_list_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,6 @@ #include #include -#include using mask_vector = std::vector; using size_column = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp index a82860a3eec..1f87fcfcc99 100644 --- a/cpp/tests/copying/scatter_list_tests.cpp +++ b/cpp/tests/copying/scatter_list_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/scatter_struct_scalar_tests.cpp b/cpp/tests/copying/scatter_struct_scalar_tests.cpp index 78572b0bb37..1d1da8a1b1e 100644 --- a/cpp/tests/copying/scatter_struct_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_struct_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/copying/scatter_struct_tests.cpp b/cpp/tests/copying/scatter_struct_tests.cpp index c92244d047b..7d88e9af85f 100644 --- a/cpp/tests/copying/scatter_struct_tests.cpp +++ b/cpp/tests/copying/scatter_struct_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include using namespace cudf::test::iterators; diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp index 41a753cd0ac..74c04446bdd 100644 --- a/cpp/tests/copying/scatter_tests.cpp +++ b/cpp/tests/copying/scatter_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -23,7 +22,6 @@ #include #include #include -#include #include diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp index 8881fb344a2..a133ae43872 100644 --- a/cpp/tests/copying/segmented_gather_list_tests.cpp +++ b/cpp/tests/copying/segmented_gather_list_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/copying/shift_tests.cpp b/cpp/tests/copying/shift_tests.cpp index ff6808d9a79..72a8e7357bc 100644 --- a/cpp/tests/copying/shift_tests.cpp +++ b/cpp/tests/copying/shift_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -30,7 +29,6 @@ #include #include -#include using TestTypes = cudf::test::Types; diff --git a/cpp/tests/copying/slice_tests.cpp b/cpp/tests/copying/slice_tests.cpp index aef0d4ad78a..3868a147fa8 100644 --- a/cpp/tests/copying/slice_tests.cpp +++ b/cpp/tests/copying/slice_tests.cpp @@ -22,12 +22,8 @@ #include #include -#include #include #include -#include -#include -#include #include #include diff --git a/cpp/tests/copying/utility_tests.cpp b/cpp/tests/copying/utility_tests.cpp index 0905f9babdc..90457f8d74c 100644 --- a/cpp/tests/copying/utility_tests.cpp +++ b/cpp/tests/copying/utility_tests.cpp @@ -23,7 +23,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 603edb27c7c..44f99adc0e9 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -23,14 +23,11 @@ #include #include -#include #include #include #include #include -#include - #define XXX false // stub for null values constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; diff --git a/cpp/tests/dictionary/add_keys_test.cpp b/cpp/tests/dictionary/add_keys_test.cpp index 46bf5468922..ebc8c11e86c 100644 --- a/cpp/tests/dictionary/add_keys_test.cpp +++ b/cpp/tests/dictionary/add_keys_test.cpp @@ -24,8 +24,6 @@ #include #include -#include - struct DictionaryAddKeysTest : public cudf::test::BaseFixture {}; TEST_F(DictionaryAddKeysTest, StringsColumn) diff --git a/cpp/tests/dictionary/encode_test.cpp b/cpp/tests/dictionary/encode_test.cpp index 5db0e9fa1e4..dfa3ede5d46 100644 --- a/cpp/tests/dictionary/encode_test.cpp +++ b/cpp/tests/dictionary/encode_test.cpp @@ -21,8 +21,6 @@ #include #include -#include - struct DictionaryEncodeTest : public cudf::test::BaseFixture {}; TEST_F(DictionaryEncodeTest, EncodeStringColumn) diff --git a/cpp/tests/dictionary/fill_test.cpp b/cpp/tests/dictionary/fill_test.cpp index 18696b66e48..bc7d19201aa 100644 --- a/cpp/tests/dictionary/fill_test.cpp +++ b/cpp/tests/dictionary/fill_test.cpp @@ -18,13 +18,10 @@ #include #include -#include #include #include #include -#include - struct DictionaryFillTest : public cudf::test::BaseFixture {}; TEST_F(DictionaryFillTest, StringsColumn) diff --git a/cpp/tests/dictionary/search_test.cpp b/cpp/tests/dictionary/search_test.cpp index 25501b4fde7..2774173b80a 100644 --- a/cpp/tests/dictionary/search_test.cpp +++ b/cpp/tests/dictionary/search_test.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include diff --git a/cpp/tests/dictionary/slice_test.cpp b/cpp/tests/dictionary/slice_test.cpp index d80f8dee079..8c15d6dbecd 100644 --- a/cpp/tests/dictionary/slice_test.cpp +++ b/cpp/tests/dictionary/slice_test.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/filling/fill_tests.cpp b/cpp/tests/filling/fill_tests.cpp index 26badefe698..a5e2db6a005 100644 --- a/cpp/tests/filling/fill_tests.cpp +++ b/cpp/tests/filling/fill_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/filling/repeat_tests.cpp b/cpp/tests/filling/repeat_tests.cpp index 6326765c68b..c856984a4a3 100644 --- a/cpp/tests/filling/repeat_tests.cpp +++ b/cpp/tests/filling/repeat_tests.cpp @@ -17,14 +17,11 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include @@ -33,7 +30,6 @@ #include #include -#include constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; diff --git a/cpp/tests/filling/sequence_tests.cpp b/cpp/tests/filling/sequence_tests.cpp index 0783b4e5bbb..53782c90c26 100644 --- a/cpp/tests/filling/sequence_tests.cpp +++ b/cpp/tests/filling/sequence_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp index a222289216d..b96c6909e55 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cpp +++ b/cpp/tests/fixed_point/fixed_point_tests.cpp @@ -18,17 +18,14 @@ #include #include #include -#include #include #include -#include #include #include #include #include -#include #include using namespace numeric; diff --git a/cpp/tests/groupby/collect_list_tests.cpp b/cpp/tests/groupby/collect_list_tests.cpp index a79b6a32916..ba456084a7c 100644 --- a/cpp/tests/groupby/collect_list_tests.cpp +++ b/cpp/tests/groupby/collect_list_tests.cpp @@ -20,8 +20,6 @@ #include #include -#include - template struct groupby_collect_list_test : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp index 61d2838590b..dfd7eb82c4a 100644 --- a/cpp/tests/groupby/collect_set_tests.cpp +++ b/cpp/tests/groupby/collect_set_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/groupby/correlation_tests.cpp b/cpp/tests/groupby/correlation_tests.cpp index 26f714632dd..f8cc813e877 100644 --- a/cpp/tests/groupby/correlation_tests.cpp +++ b/cpp/tests/groupby/correlation_tests.cpp @@ -25,7 +25,6 @@ #include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/groupby/covariance_tests.cpp b/cpp/tests/groupby/covariance_tests.cpp index e3eb2da201f..81378bb91e8 100644 --- a/cpp/tests/groupby/covariance_tests.cpp +++ b/cpp/tests/groupby/covariance_tests.cpp @@ -23,10 +23,8 @@ #include #include -#include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/groupby/groupby_test_util.cpp b/cpp/tests/groupby/groupby_test_util.cpp index 5d99d15ae77..df0375d6a09 100644 --- a/cpp/tests/groupby/groupby_test_util.cpp +++ b/cpp/tests/groupby/groupby_test_util.cpp @@ -17,8 +17,8 @@ #include "groupby_test_util.hpp" #include -#include #include +#include #include #include @@ -27,9 +27,6 @@ #include #include #include -#include - -#include void test_single_agg(cudf::column_view const& keys, cudf::column_view const& values, diff --git a/cpp/tests/groupby/groupby_test_util.hpp b/cpp/tests/groupby/groupby_test_util.hpp index 755b0c20f17..9d2e613be3e 100644 --- a/cpp/tests/groupby/groupby_test_util.hpp +++ b/cpp/tests/groupby/groupby_test_util.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,11 +16,8 @@ #pragma once -#include #include -#include #include -#include enum class force_use_sort_impl : bool { NO, YES }; diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index 2d447025919..783cfb17e49 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/groupby/max_scan_tests.cpp b/cpp/tests/groupby/max_scan_tests.cpp index d86de798844..6195e0179ec 100644 --- a/cpp/tests/groupby/max_scan_tests.cpp +++ b/cpp/tests/groupby/max_scan_tests.cpp @@ -22,7 +22,6 @@ #include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/groupby/merge_lists_tests.cpp b/cpp/tests/groupby/merge_lists_tests.cpp index 279d71560b4..4481e2dc022 100644 --- a/cpp/tests/groupby/merge_lists_tests.cpp +++ b/cpp/tests/groupby/merge_lists_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/groupby/merge_sets_tests.cpp b/cpp/tests/groupby/merge_sets_tests.cpp index 9736bb84dd6..1bfba265478 100644 --- a/cpp/tests/groupby/merge_sets_tests.cpp +++ b/cpp/tests/groupby/merge_sets_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp index 7f31bc9089f..f2a50248b4a 100644 --- a/cpp/tests/groupby/rank_scan_tests.cpp +++ b/cpp/tests/groupby/rank_scan_tests.cpp @@ -22,8 +22,6 @@ #include #include -#include - using namespace cudf::test::iterators; template diff --git a/cpp/tests/groupby/shift_tests.cpp b/cpp/tests/groupby/shift_tests.cpp index 14c9ceb4508..49f9d7cb10a 100644 --- a/cpp/tests/groupby/shift_tests.cpp +++ b/cpp/tests/groupby/shift_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include template diff --git a/cpp/tests/hashing/md5_test.cpp b/cpp/tests/hashing/md5_test.cpp index 69e518cbf8d..b54adb52496 100644 --- a/cpp/tests/hashing/md5_test.cpp +++ b/cpp/tests/hashing/md5_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/hashing/murmurhash3_x86_32_test.cpp b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp index c1a6e6ff6e1..b4622f5eb81 100644 --- a/cpp/tests/hashing/murmurhash3_x86_32_test.cpp +++ b/cpp/tests/hashing/murmurhash3_x86_32_test.cpp @@ -17,11 +17,9 @@ #include #include #include -#include #include #include -#include #include constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; diff --git a/cpp/tests/hashing/sha1_test.cpp b/cpp/tests/hashing/sha1_test.cpp index e28e71442a6..3aa0bda6ae8 100644 --- a/cpp/tests/hashing/sha1_test.cpp +++ b/cpp/tests/hashing/sha1_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -137,7 +136,7 @@ TEST_F(SHA1HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha1(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha1(input), cudf::data_type_error); } TEST_F(SHA1HashTest, StructsUnsupported) @@ -146,7 +145,7 @@ TEST_F(SHA1HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha1(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha1(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha224_test.cpp b/cpp/tests/hashing/sha224_test.cpp index 61b584f94df..3f6aeb9d5e6 100644 --- a/cpp/tests/hashing/sha224_test.cpp +++ b/cpp/tests/hashing/sha224_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -137,7 +136,7 @@ TEST_F(SHA224HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha224(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha224(input), cudf::data_type_error); } TEST_F(SHA224HashTest, StructsUnsupported) @@ -146,7 +145,7 @@ TEST_F(SHA224HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha224(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha224(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha256_test.cpp b/cpp/tests/hashing/sha256_test.cpp index 8bc47c92c6b..9519e96fbae 100644 --- a/cpp/tests/hashing/sha256_test.cpp +++ b/cpp/tests/hashing/sha256_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -136,7 +135,7 @@ TEST_F(SHA256HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha256(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha256(input), cudf::data_type_error); } TEST_F(SHA256HashTest, StructsUnsupported) @@ -145,7 +144,7 @@ TEST_F(SHA256HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha256(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha256(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha384_test.cpp b/cpp/tests/hashing/sha384_test.cpp index 4c79934f98d..9de566b9d9b 100644 --- a/cpp/tests/hashing/sha384_test.cpp +++ b/cpp/tests/hashing/sha384_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -155,7 +154,7 @@ TEST_F(SHA384HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha384(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha384(input), cudf::data_type_error); } TEST_F(SHA384HashTest, StructsUnsupported) @@ -164,7 +163,7 @@ TEST_F(SHA384HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha384(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha384(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/sha512_test.cpp b/cpp/tests/hashing/sha512_test.cpp index 0eb1c60b8fc..95e5245f38e 100644 --- a/cpp/tests/hashing/sha512_test.cpp +++ b/cpp/tests/hashing/sha512_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -155,7 +154,7 @@ TEST_F(SHA512HashTest, ListsUnsupported) auto const input = cudf::table_view({strings_list_col}); - EXPECT_THROW(cudf::hashing::sha512(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha512(input), cudf::data_type_error); } TEST_F(SHA512HashTest, StructsUnsupported) @@ -164,7 +163,7 @@ TEST_F(SHA512HashTest, StructsUnsupported) auto struct_col = cudf::test::structs_column_wrapper{{child_col}}; auto const input = cudf::table_view({struct_col}); - EXPECT_THROW(cudf::hashing::sha512(input), cudf::logic_error); + EXPECT_THROW(cudf::hashing::sha512(input), cudf::data_type_error); } template diff --git a/cpp/tests/hashing/xxhash_64_test.cpp b/cpp/tests/hashing/xxhash_64_test.cpp index ab4ed829681..d8694a72d94 100644 --- a/cpp/tests/hashing/xxhash_64_test.cpp +++ b/cpp/tests/hashing/xxhash_64_test.cpp @@ -17,11 +17,8 @@ #include #include #include -#include #include -#include -#include #include using NumericTypesNoBools = diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp index 2151ec6e22f..1ddc33e749a 100644 --- a/cpp/tests/interop/from_arrow_device_test.cpp +++ b/cpp/tests/interop/from_arrow_device_test.cpp @@ -17,17 +17,13 @@ #include "nanoarrow_utils.hpp" #include -#include #include #include -#include #include #include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp index ef9936b214c..d93ef28aab8 100644 --- a/cpp/tests/interop/from_arrow_host_test.cpp +++ b/cpp/tests/interop/from_arrow_host_test.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -28,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/interop/from_arrow_stream_test.cpp b/cpp/tests/interop/from_arrow_stream_test.cpp index 80a2e4b2ffd..3916025bf22 100644 --- a/cpp/tests/interop/from_arrow_stream_test.cpp +++ b/cpp/tests/interop/from_arrow_stream_test.cpp @@ -17,27 +17,14 @@ #include "nanoarrow_utils.hpp" #include -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include #include #include #include -#include #include -#include - struct VectorOfArrays { std::vector arrays; nanoarrow::UniqueSchema schema; diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 6e742b9e4cf..18efae75cb1 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -25,9 +25,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -37,8 +35,6 @@ #include #include -#include -#include std::unique_ptr get_cudf_table() { diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp index 7ba586461dc..29aa928c277 100644 --- a/cpp/tests/interop/to_arrow_device_test.cpp +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -17,21 +17,15 @@ #include "nanoarrow_utils.hpp" #include -#include #include -#include -#include #include #include -#include -#include #include #include #include #include #include -#include #include #include diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp index fcb4433b42e..fa3aa82fee2 100644 --- a/cpp/tests/interop/to_arrow_host_test.cpp +++ b/cpp/tests/interop/to_arrow_host_test.cpp @@ -17,20 +17,14 @@ #include "nanoarrow_utils.hpp" #include -#include #include -#include -#include #include #include #include #include -#include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index a6aa4b22eca..86295d8efb1 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -19,14 +19,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index b265dcf9273..cc1e367d114 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -17,14 +17,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include @@ -32,18 +30,12 @@ #include #include #include -#include -#include #include -#include #include -#include - #include #include -#include #include #include #include diff --git a/cpp/tests/io/file_io_test.cpp b/cpp/tests/io/file_io_test.cpp index 3c41f21b0a4..1b85541687a 100644 --- a/cpp/tests/io/file_io_test.cpp +++ b/cpp/tests/io/file_io_test.cpp @@ -15,13 +15,10 @@ */ #include -#include #include #include -#include - // Base test fixture for tests struct CuFileIOTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/io/json/json_quote_normalization_test.cpp b/cpp/tests/io/json/json_quote_normalization_test.cpp index d23acf3ae00..c8c2d18903f 100644 --- a/cpp/tests/io/json/json_quote_normalization_test.cpp +++ b/cpp/tests/io/json/json_quote_normalization_test.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include @@ -29,7 +28,6 @@ #include #include -#include #include diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp index cb6716f4a18..b58ca56e066 100644 --- a/cpp/tests/io/json/json_test.cpp +++ b/cpp/tests/io/json/json_test.cpp @@ -39,8 +39,6 @@ #include -#include - #include #include #include @@ -2975,4 +2973,22 @@ TEST_F(JsonReaderTest, JsonDtypeSchema) cudf::test::debug_output_level::ALL_ERRORS); } +TEST_F(JsonReaderTest, LastRecordInvalid) +{ + std::string data = R"({"key": "1"} + {"key": "})"; + std::map schema{{"key", {dtype()}}}; + auto opts = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .dtypes(schema) + .lines(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .build(); + auto const result = cudf::io::read_json(opts); + + EXPECT_EQ(result.metadata.schema_info[0].name, "key"); + cudf::test::strings_column_wrapper expected{{"1", ""}, cudf::test::iterators::nulls_at({1})}; + CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), cudf::table_view{{expected}}); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/json/json_tree.cpp b/cpp/tests/io/json/json_tree.cpp index 15682c6ae6b..887d4fa783f 100644 --- a/cpp/tests/io/json/json_tree.cpp +++ b/cpp/tests/io/json/json_tree.cpp @@ -15,12 +15,8 @@ */ #include "io/json/nested_json.hpp" -#include "io/utilities/hostdevice_vector.hpp" #include -#include -#include -#include #include #include @@ -29,9 +25,9 @@ #include #include -#include #include +#include #include #include #include diff --git a/cpp/tests/io/json/json_utils.cuh b/cpp/tests/io/json/json_utils.cuh index 9383797d91b..c31bb2d24e0 100644 --- a/cpp/tests/io/json/json_utils.cuh +++ b/cpp/tests/io/json/json_utils.cuh @@ -52,6 +52,7 @@ std::vector split_byte_range_reading( reader_opts.get_compression(), reader_opts.get_byte_range_offset(), reader_opts.get_byte_range_size(), + reader_opts.get_delimiter(), stream); // Note: we cannot reuse cudf::io::json::detail::find_first_delimiter since the // return type of that function is size_type. However, when the chunk_size is diff --git a/cpp/tests/io/json/nested_json_test.cpp b/cpp/tests/io/json/nested_json_test.cpp index f32aba0e632..e0e955c4f48 100644 --- a/cpp/tests/io/json/nested_json_test.cpp +++ b/cpp/tests/io/json/nested_json_test.cpp @@ -21,24 +21,16 @@ #include #include #include -#include #include -#include #include -#include #include -#include #include -#include #include #include #include #include -#include - -#include #include #include diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 8ad1fea649d..5f1aea71f73 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -1358,10 +1358,11 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow) int64_t constexpr total_rows = num_rows * num_reps; static_assert(total_rows > std::numeric_limits::max()); - auto const it = cudf::detail::make_counting_transform_iterator(0l, [num_rows](int64_t i) { - return (i % num_rows) % static_cast(std::numeric_limits::max() / 2); - }); - auto const col = data_col(it, it + num_rows); + auto const it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [num_rows](int64_t i) { + return (i % num_rows) % static_cast(std::numeric_limits::max() / 2); + }); + auto const col = data_col(it, it + num_rows); auto const chunk_table = cudf::table_view{{col}}; std::vector data_buffer; diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index cce0adbf317..fce99187516 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp index c90b81ed27a..d66aa3bde9d 100644 --- a/cpp/tests/io/parquet_common.hpp +++ b/cpp/tests/io/parquet_common.hpp @@ -22,13 +22,11 @@ #include #include -#include #include #include #include #include -#include #include #include diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp index f1286a00d22..d66f685cd9c 100644 --- a/cpp/tests/io/parquet_misc_test.cpp +++ b/cpp/tests/io/parquet_misc_test.cpp @@ -20,8 +20,6 @@ #include #include -#include -#include #include diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp index ab4645c2e25..177e6163d4f 100644 --- a/cpp/tests/io/parquet_reader_test.cpp +++ b/cpp/tests/io/parquet_reader_test.cpp @@ -29,6 +29,8 @@ #include #include +#include + #include TEST_F(ParquetReaderTest, UserBounds) @@ -2725,7 +2727,9 @@ TYPED_TEST(ParquetReaderPredicatePushdownTest, FilterTyped) CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result_table); } -TEST_F(ParquetReaderTest, ListsWideTable) +// The test below requires several minutes to complete with memcheck, thus it is disabled by +// default. +TEST_F(ParquetReaderTest, DISABLED_ListsWideTable) { auto constexpr num_rows = 2; auto constexpr num_cols = 26'755; // for slightly over 2B keys diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index be2ecd56424..5c3c8342cd2 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include // NOTE: this file exists to define the parquet test's `main()` function. diff --git a/cpp/tests/io/row_selection_test.cpp b/cpp/tests/io/row_selection_test.cpp index ebadd870091..c40d3bbd299 100644 --- a/cpp/tests/io/row_selection_test.cpp +++ b/cpp/tests/io/row_selection_test.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp index 6f46df20633..79ce908f3e0 100644 --- a/cpp/tests/io/text/data_chunk_source_test.cpp +++ b/cpp/tests/io/text/data_chunk_source_test.cpp @@ -15,14 +15,11 @@ */ #include -#include #include #include #include -#include - #include #include diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp index 74d08061df9..60244462e2c 100644 --- a/cpp/tests/io/text/multibyte_split_test.cpp +++ b/cpp/tests/io/text/multibyte_split_test.cpp @@ -19,16 +19,12 @@ #include #include #include -#include -#include #include -#include #include #include #include #include -#include #include using cudf::test::strings_column_wrapper; diff --git a/cpp/tests/iterator/value_iterator.cpp b/cpp/tests/iterator/value_iterator.cpp index 22bc7475dbe..f7f7c0f2721 100644 --- a/cpp/tests/iterator/value_iterator.cpp +++ b/cpp/tests/iterator/value_iterator.cpp @@ -13,7 +13,6 @@ * the License. */ -#include #include CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/jit/parse_ptx_function.cpp b/cpp/tests/jit/parse_ptx_function.cpp index 6f9dfd06730..c9bb691907a 100644 --- a/cpp/tests/jit/parse_ptx_function.cpp +++ b/cpp/tests/jit/parse_ptx_function.cpp @@ -16,7 +16,6 @@ #include "jit/parser.hpp" -#include #include #include diff --git a/cpp/tests/join/cross_join_tests.cpp b/cpp/tests/join/cross_join_tests.cpp index d87f5e54153..971913443e5 100644 --- a/cpp/tests/join/cross_join_tests.cpp +++ b/cpp/tests/join/cross_join_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index 178edc52dd3..9070efa38fe 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -15,12 +15,8 @@ */ #include -#include #include -#include #include -#include -#include #include #include @@ -31,7 +27,6 @@ #include #include -#include #include template diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 3431e941359..6a8a54c8465 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -20,17 +20,12 @@ #include #include #include -#include #include -#include #include #include -#include -#include #include #include -#include #include #include #include diff --git a/cpp/tests/join/semi_anti_join_tests.cpp b/cpp/tests/join/semi_anti_join_tests.cpp index 554d5754e39..ddc65c3f379 100644 --- a/cpp/tests/join/semi_anti_join_tests.cpp +++ b/cpp/tests/join/semi_anti_join_tests.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/json/json_tests.cpp b/cpp/tests/json/json_tests.cpp index 42a574ac5c0..53166e04173 100644 --- a/cpp/tests/json/json_tests.cpp +++ b/cpp/tests/json/json_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/large_strings/large_strings_fixture.cpp b/cpp/tests/large_strings/large_strings_fixture.cpp index 7b61be113f9..f1404990354 100644 --- a/cpp/tests/large_strings/large_strings_fixture.cpp +++ b/cpp/tests/large_strings/large_strings_fixture.cpp @@ -16,12 +16,10 @@ #include "large_strings_fixture.hpp" -#include #include #include #include -#include #include #include diff --git a/cpp/tests/large_strings/parquet_tests.cpp b/cpp/tests/large_strings/parquet_tests.cpp index 007c08ce0fb..f47782a2d02 100644 --- a/cpp/tests/large_strings/parquet_tests.cpp +++ b/cpp/tests/large_strings/parquet_tests.cpp @@ -16,8 +16,6 @@ #include "large_strings_fixture.hpp" -#include -#include #include #include diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp index 8fb2b403051..7ae7a6a7414 100644 --- a/cpp/tests/lists/contains_tests.cpp +++ b/cpp/tests/lists/contains_tests.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp index 92dd5df5ec7..2c24f695c29 100644 --- a/cpp/tests/lists/extract_tests.cpp +++ b/cpp/tests/lists/extract_tests.cpp @@ -21,12 +21,8 @@ #include #include -#include -#include #include -#include - #include #include #include diff --git a/cpp/tests/lists/sequences_tests.cpp b/cpp/tests/lists/sequences_tests.cpp index 74545903eb3..dcb906cd2ef 100644 --- a/cpp/tests/lists/sequences_tests.cpp +++ b/cpp/tests/lists/sequences_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp index 5625b47e7ea..18aa118bb81 100644 --- a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp +++ b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp @@ -20,8 +20,6 @@ #include #include -#include -#include #include namespace cudf::test { diff --git a/cpp/tests/merge/merge_dictionary_test.cpp b/cpp/tests/merge/merge_dictionary_test.cpp index dd528c19e4e..1d7a31fd797 100644 --- a/cpp/tests/merge/merge_dictionary_test.cpp +++ b/cpp/tests/merge/merge_dictionary_test.cpp @@ -17,9 +17,7 @@ #include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index bea044496b3..d9fdb6099f0 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -17,10 +17,8 @@ #include #include #include -#include #include -#include #include #include #include @@ -30,10 +28,6 @@ #include -#include -#include -#include -#include #include #include diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index 6208d395f0a..fad390105d7 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -34,7 +33,6 @@ #include #include -#include #include diff --git a/cpp/tests/partitioning/round_robin_test.cpp b/cpp/tests/partitioning/round_robin_test.cpp index 89d23c39dca..3693cfbcc72 100644 --- a/cpp/tests/partitioning/round_robin_test.cpp +++ b/cpp/tests/partitioning/round_robin_test.cpp @@ -17,10 +17,8 @@ #include #include #include -#include #include -#include #include #include #include @@ -30,12 +28,7 @@ #include -#include -#include -#include -#include #include -#include #include using cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/quantiles/quantile_test.cpp b/cpp/tests/quantiles/quantile_test.cpp index 6e88365b6e8..23b58618fe1 100644 --- a/cpp/tests/quantiles/quantile_test.cpp +++ b/cpp/tests/quantiles/quantile_test.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/quantiles/quantiles_test.cpp b/cpp/tests/quantiles/quantiles_test.cpp index 44d4ec61852..c7e11af8c85 100644 --- a/cpp/tests/quantiles/quantiles_test.cpp +++ b/cpp/tests/quantiles/quantiles_test.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/reductions/ewm_tests.cpp b/cpp/tests/reductions/ewm_tests.cpp index 09cec688509..1117b0d1acf 100644 --- a/cpp/tests/reductions/ewm_tests.cpp +++ b/cpp/tests/reductions/ewm_tests.cpp @@ -18,9 +18,7 @@ #include #include -#include -#include #include template diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp index f5470f7d881..cb412f1e925 100644 --- a/cpp/tests/reductions/list_rank_test.cpp +++ b/cpp/tests/reductions/list_rank_test.cpp @@ -14,14 +14,9 @@ * limitations under the License. */ -#include - #include #include -#include -#include -#include #include struct ListRankScanTest : public cudf::test::BaseFixture { @@ -136,7 +131,7 @@ TEST_F(ListRankScanTest, ListOfStruct) false, false}}; auto col2 = cudf::test::strings_column_wrapper{ - {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"}, + {"x", "x", "a", "a", "b", "", "a", "b", "a", "b", "a", "c", "a", "c", "", "", "b", "b"}, {true, true, true, diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp index 3ab1fc01eaa..130458548fc 100644 --- a/cpp/tests/reductions/rank_tests.cpp +++ b/cpp/tests/reductions/rank_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -126,7 +125,7 @@ auto make_input_column() { if constexpr (std::is_same_v) { return cudf::test::strings_column_wrapper{ - {"0", "0", "4", "4", "4", "5", "7", "7", "7", "9", "9", "9"}, + {"0", "0", "4", "4", "4", "", "7", "7", "7", "9", "9", "9"}, cudf::test::iterators::null_at(5)}; } else { using fw_wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index bdb98372836..67083f19b3a 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -22,9 +22,7 @@ #include #include -#include #include -#include #include #include #include @@ -33,11 +31,9 @@ #include #include -#include #include #include -#include #include #include @@ -1259,6 +1255,12 @@ TEST_P(StringReductionTest, MinMax) // data and valid arrays std::vector host_strings(GetParam()); std::vector host_bools({true, false, true, true, true, true, false, false, true}); + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(host_strings.size()), + host_strings.begin(), + [host_strings, host_bools](auto idx) { + return host_bools[idx] ? host_strings[idx] : std::string{}; + }); bool succeed(true); std::string initial_value = "init"; @@ -1385,7 +1387,7 @@ TEST_F(StringReductionTest, AllNull) std::vector host_strings( {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}); std::vector host_bools(host_strings.size(), false); - auto initial_value = cudf::make_string_scalar("init"); + auto initial_value = cudf::make_string_scalar(""); initial_value->set_valid_async(false); // string column with nulls @@ -3086,21 +3088,28 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls) using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; - // `null` means null at child column. - // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*null*/, - "aaa" /*NULL*/, + "", // child null + "aaa", // parent null "zit", "bat", "aab", - "$1" /*null*/, - "€1" /*NULL*/, + "", // child null + "€1", // parent null "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, + 2, + 0, // child null + 4, // parent null + 5, + 6, + 7, + 0, // child null + 9, // parent NULL + 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index c4463d68a68..5f911597b02 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -20,13 +20,11 @@ #include #include -#include #include #include #include #include -#include #include #include @@ -414,12 +412,13 @@ TEST_F(ScanStringsTest, MoreStringsMinMax) { int row_count = 512; - auto data_begin = cudf::detail::make_counting_transform_iterator(0, [](auto idx) { + auto validity = cudf::detail::make_counting_transform_iterator( + 0, [](auto idx) -> bool { return (idx % 23) != 22; }); + auto data_begin = cudf::detail::make_counting_transform_iterator(0, [validity](auto idx) { + if (validity[idx] == 0) return std::string{}; char const s = static_cast('a' + (idx % 26)); return std::string{1, s}; }); - auto validity = cudf::detail::make_counting_transform_iterator( - 0, [](auto idx) -> bool { return (idx % 23) != 22; }); cudf::test::strings_column_wrapper col(data_begin, data_begin + row_count, validity); thrust::host_vector v(data_begin, data_begin + row_count); @@ -622,21 +621,28 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; - // `null` means null at child column. - // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*null*/, - "aaa" /*NULL*/, + "", // child null + "aaa", // parent null "zit", "bat", "aab", - "$1" /*null*/, - "€1" /*NULL*/, + "", // child null + "€1", // parent null "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, + 2, + 0, // child null + 4, // parent null + 5, + 6, + 7, + 0, // child null + 9, // parent null + 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); @@ -694,25 +700,25 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) auto const expected = [] { auto child1 = STRINGS_CW{{"año", "año", - "" /*null*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/}, + "", // child null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + ""}, // parent null null_at(2)}; auto child2 = INTS_CW{{1, 1, - 0 /*null*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/}, + 0, // child null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0}, // parent null null_at(2)}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})}; }(); diff --git a/cpp/tests/reductions/scan_tests.hpp b/cpp/tests/reductions/scan_tests.hpp index 858697d8ef5..c2cce4bbbfa 100644 --- a/cpp/tests/reductions/scan_tests.hpp +++ b/cpp/tests/reductions/scan_tests.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,9 +20,7 @@ #include #include -#include #include -#include #include #include @@ -30,7 +28,6 @@ #include #include -#include template struct TypeParam_to_host_type { diff --git a/cpp/tests/replace/clamp_test.cpp b/cpp/tests/replace/clamp_test.cpp index 239c9ce6ddd..e972ea35ed0 100644 --- a/cpp/tests/replace/clamp_test.cpp +++ b/cpp/tests/replace/clamp_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/replace/normalize_replace_tests.cpp b/cpp/tests/replace/normalize_replace_tests.cpp index 2de17388ee8..c35f385329a 100644 --- a/cpp/tests/replace/normalize_replace_tests.cpp +++ b/cpp/tests/replace/normalize_replace_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include // This is the main test fixture diff --git a/cpp/tests/replace/replace_nans_tests.cpp b/cpp/tests/replace/replace_nans_tests.cpp index 35232204db7..1b9fe92066a 100644 --- a/cpp/tests/replace/replace_nans_tests.cpp +++ b/cpp/tests/replace/replace_nans_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp index fcee27305f2..0c8ccea52a6 100644 --- a/cpp/tests/replace/replace_nulls_tests.cpp +++ b/cpp/tests/replace/replace_nulls_tests.cpp @@ -20,13 +20,11 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp index b12bf08520f..ae4041bcfaf 100644 --- a/cpp/tests/replace/replace_tests.cpp +++ b/cpp/tests/replace/replace_tests.cpp @@ -20,20 +20,16 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include #include #include -#include #include diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp index b3d9b2e2f5f..59585c0e947 100644 --- a/cpp/tests/reshape/byte_cast_tests.cpp +++ b/cpp/tests/reshape/byte_cast_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/reshape/tile_tests.cpp b/cpp/tests/reshape/tile_tests.cpp index ed76b9d2ea5..25cfc5c5108 100644 --- a/cpp/tests/reshape/tile_tests.cpp +++ b/cpp/tests/reshape/tile_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index 165e0347785..e8a36d9ab48 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/empty_input_test.cpp b/cpp/tests/rolling/empty_input_test.cpp index e7d1e3f0b10..2e1815671a9 100644 --- a/cpp/tests/rolling/empty_input_test.cpp +++ b/cpp/tests/rolling/empty_input_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,7 @@ */ #include -#include #include -#include #include #include diff --git a/cpp/tests/rolling/grouped_rolling_range_test.cpp b/cpp/tests/rolling/grouped_rolling_range_test.cpp index fcfbd0eee78..2cb9b60000b 100644 --- a/cpp/tests/rolling/grouped_rolling_range_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_range_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,21 +17,16 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include #include #include -#include -#include #include #include diff --git a/cpp/tests/rolling/grouped_rolling_test.cpp b/cpp/tests/rolling/grouped_rolling_test.cpp index 78d5daf7e83..78b444bcd93 100644 --- a/cpp/tests/rolling/grouped_rolling_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_test.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/lead_lag_test.cpp b/cpp/tests/rolling/lead_lag_test.cpp index de057e96320..6519b0ed4ee 100644 --- a/cpp/tests/rolling/lead_lag_test.cpp +++ b/cpp/tests/rolling/lead_lag_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -26,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/nth_element_test.cpp b/cpp/tests/rolling/nth_element_test.cpp index 2444992e68f..5f2b383ed55 100644 --- a/cpp/tests/rolling/nth_element_test.cpp +++ b/cpp/tests/rolling/nth_element_test.cpp @@ -17,22 +17,15 @@ #include #include #include -#include #include #include #include -#include -#include #include -#include - #include #include -#include - #include #include diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp index 0eaab0c9f7a..dcaa47e722b 100644 --- a/cpp/tests/rolling/offset_row_window_test.cpp +++ b/cpp/tests/rolling/offset_row_window_test.cpp @@ -17,14 +17,10 @@ #include #include #include -#include #include #include -#include -#include #include -#include template using fwcw = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/rolling/range_rolling_window_test.cpp b/cpp/tests/rolling/range_rolling_window_test.cpp index 461c41025e9..daf5fcc1d96 100644 --- a/cpp/tests/rolling/range_rolling_window_test.cpp +++ b/cpp/tests/rolling/range_rolling_window_test.cpp @@ -17,22 +17,17 @@ #include #include #include -#include #include #include -#include #include #include -#include -#include #include #include #include #include -#include #include #include diff --git a/cpp/tests/rolling/range_window_bounds_test.cpp b/cpp/tests/rolling/range_window_bounds_test.cpp index b77451bf0bc..a67555280f4 100644 --- a/cpp/tests/rolling/range_window_bounds_test.cpp +++ b/cpp/tests/rolling/range_window_bounds_test.cpp @@ -15,9 +15,6 @@ */ #include -#include -#include -#include #include #include @@ -25,8 +22,6 @@ #include -#include - struct RangeWindowBoundsTest : public cudf::test::BaseFixture {}; template diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index 6e0dc16dca9..72a511fd5f1 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -30,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp index 5f132f3ace9..26987ea1b7b 100644 --- a/cpp/tests/scalar/factories_test.cpp +++ b/cpp/tests/scalar/factories_test.cpp @@ -22,11 +22,8 @@ #include #include -#include #include -#include - class ScalarFactoryTest : public cudf::test::BaseFixture {}; template diff --git a/cpp/tests/search/search_dictionary_test.cpp b/cpp/tests/search/search_dictionary_test.cpp index 78f79ccc648..a3bb1dfda10 100644 --- a/cpp/tests/search/search_dictionary_test.cpp +++ b/cpp/tests/search/search_dictionary_test.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include diff --git a/cpp/tests/search/search_list_test.cpp b/cpp/tests/search/search_list_test.cpp index 7584003e800..fb5d0fcc889 100644 --- a/cpp/tests/search/search_list_test.cpp +++ b/cpp/tests/search/search_list_test.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp index c35d359e75c..05b9deb3463 100644 --- a/cpp/tests/search/search_struct_test.cpp +++ b/cpp/tests/search/search_struct_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/search/search_test.cpp b/cpp/tests/search/search_test.cpp index 7550cc27161..8d750be5677 100644 --- a/cpp/tests/search/search_test.cpp +++ b/cpp/tests/search/search_test.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/sort/is_sorted_tests.cpp b/cpp/tests/sort/is_sorted_tests.cpp index 109095192f9..e3c9f8d349e 100644 --- a/cpp/tests/sort/is_sorted_tests.cpp +++ b/cpp/tests/sort/is_sorted_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp index e08a2105aea..ded46cb1f31 100644 --- a/cpp/tests/sort/rank_test.cpp +++ b/cpp/tests/sort/rank_test.cpp @@ -18,10 +18,8 @@ #include #include #include -#include #include -#include #include #include #include diff --git a/cpp/tests/sort/sort_nested_types_tests.cpp b/cpp/tests/sort/sort_nested_types_tests.cpp index 8ab23936ceb..ce4148a941e 100644 --- a/cpp/tests/sort/sort_nested_types_tests.cpp +++ b/cpp/tests/sort/sort_nested_types_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 6a35e977b46..e1505c7a474 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -28,7 +28,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/sort/stable_sort_tests.cpp b/cpp/tests/sort/stable_sort_tests.cpp index 655166e0d62..88de9d51523 100644 --- a/cpp/tests/sort/stable_sort_tests.cpp +++ b/cpp/tests/sort/stable_sort_tests.cpp @@ -25,9 +25,6 @@ #include #include -#include -#include - #include #include diff --git a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp index 6c0582fb846..1204b019739 100644 --- a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp +++ b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp @@ -20,9 +20,7 @@ #include #include #include -#include -#include #include #include #include @@ -31,8 +29,6 @@ #include #include -#include -#include #include struct ApplyBooleanMask : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/stream_compaction/distinct_count_tests.cpp b/cpp/tests/stream_compaction/distinct_count_tests.cpp index a2dab649961..ee1bb3ead92 100644 --- a/cpp/tests/stream_compaction/distinct_count_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_count_tests.cpp @@ -15,16 +15,11 @@ */ #include -#include #include #include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 14d7d8789ac..c618ff68cbb 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -27,8 +26,6 @@ #include #include -#include - auto constexpr null{0}; // null at current level auto constexpr XXX{0}; // null pushed down from parent level auto constexpr NaN = std::numeric_limits::quiet_NaN(); diff --git a/cpp/tests/stream_compaction/drop_nans_tests.cpp b/cpp/tests/stream_compaction/drop_nans_tests.cpp index bf72da5c840..71321361564 100644 --- a/cpp/tests/stream_compaction/drop_nans_tests.cpp +++ b/cpp/tests/stream_compaction/drop_nans_tests.cpp @@ -15,12 +15,9 @@ */ #include -#include #include #include -#include -#include #include #include #include diff --git a/cpp/tests/stream_compaction/drop_nulls_tests.cpp b/cpp/tests/stream_compaction/drop_nulls_tests.cpp index dbac1d58195..d3b45c2323e 100644 --- a/cpp/tests/stream_compaction/drop_nulls_tests.cpp +++ b/cpp/tests/stream_compaction/drop_nulls_tests.cpp @@ -15,12 +15,10 @@ */ #include -#include #include #include #include -#include #include #include #include diff --git a/cpp/tests/stream_compaction/stable_distinct_tests.cpp b/cpp/tests/stream_compaction/stable_distinct_tests.cpp index 6c6c53331d4..cc847da6340 100644 --- a/cpp/tests/stream_compaction/stable_distinct_tests.cpp +++ b/cpp/tests/stream_compaction/stable_distinct_tests.cpp @@ -15,20 +15,16 @@ */ #include -#include #include #include #include #include -#include #include #include #include #include -#include - auto constexpr null{0}; // null at current level auto constexpr XXX{0}; // null pushed down from parent level auto constexpr NaN = std::numeric_limits::quiet_NaN(); diff --git a/cpp/tests/stream_compaction/unique_count_tests.cpp b/cpp/tests/stream_compaction/unique_count_tests.cpp index 640d159fc4f..bad93e92712 100644 --- a/cpp/tests/stream_compaction/unique_count_tests.cpp +++ b/cpp/tests/stream_compaction/unique_count_tests.cpp @@ -15,16 +15,11 @@ */ #include -#include #include #include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/stream_compaction/unique_tests.cpp b/cpp/tests/stream_compaction/unique_tests.cpp index d5b6915b520..e2b32b898b3 100644 --- a/cpp/tests/stream_compaction/unique_tests.cpp +++ b/cpp/tests/stream_compaction/unique_tests.cpp @@ -15,22 +15,16 @@ */ #include -#include #include #include #include -#include #include -#include #include #include #include #include -#include -#include - using cudf::nan_policy; using cudf::null_equality; using cudf::null_policy; diff --git a/cpp/tests/streams/binaryop_test.cpp b/cpp/tests/streams/binaryop_test.cpp index 2a7b52b1b6b..3dcc6f9e632 100644 --- a/cpp/tests/streams/binaryop_test.cpp +++ b/cpp/tests/streams/binaryop_test.cpp @@ -21,7 +21,6 @@ #include #include -#include #include class BinaryopTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/io/csv_test.cpp b/cpp/tests/streams/io/csv_test.cpp index 42894a0ebcb..a74ee64f8de 100644 --- a/cpp/tests/streams/io/csv_test.cpp +++ b/cpp/tests/streams/io/csv_test.cpp @@ -17,13 +17,9 @@ #include #include #include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/streams/io/json_test.cpp b/cpp/tests/streams/io/json_test.cpp index f98e685ed0c..d352c6c3b2a 100644 --- a/cpp/tests/streams/io/json_test.cpp +++ b/cpp/tests/streams/io/json_test.cpp @@ -19,9 +19,7 @@ #include #include -#include #include -#include #include #include diff --git a/cpp/tests/streams/io/multibyte_split_test.cpp b/cpp/tests/streams/io/multibyte_split_test.cpp index b0eff1d3340..5bb17226029 100644 --- a/cpp/tests/streams/io/multibyte_split_test.cpp +++ b/cpp/tests/streams/io/multibyte_split_test.cpp @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/streams/io/orc_test.cpp b/cpp/tests/streams/io/orc_test.cpp index cc43bf15b5d..10722557e6a 100644 --- a/cpp/tests/streams/io/orc_test.cpp +++ b/cpp/tests/streams/io/orc_test.cpp @@ -17,19 +17,11 @@ #include #include #include -#include -#include #include #include -#include #include -#include -#include -#include -#include -#include #include #include diff --git a/cpp/tests/streams/io/parquet_test.cpp b/cpp/tests/streams/io/parquet_test.cpp index 9d2dec2d697..18bb80e64af 100644 --- a/cpp/tests/streams/io/parquet_test.cpp +++ b/cpp/tests/streams/io/parquet_test.cpp @@ -17,13 +17,9 @@ #include #include #include -#include -#include #include #include -#include -#include #include #include diff --git a/cpp/tests/streams/join_test.cpp b/cpp/tests/streams/join_test.cpp index 2811bb676fa..27bd7e080c9 100644 --- a/cpp/tests/streams/join_test.cpp +++ b/cpp/tests/streams/join_test.cpp @@ -19,11 +19,9 @@ #include #include -#include #include #include #include -#include #include #include diff --git a/cpp/tests/streams/null_mask_test.cpp b/cpp/tests/streams/null_mask_test.cpp index e96224003f4..ed37a72545f 100644 --- a/cpp/tests/streams/null_mask_test.cpp +++ b/cpp/tests/streams/null_mask_test.cpp @@ -14,15 +14,12 @@ * limitations under the License. */ -#include - #include #include #include #include #include -#include class NullMaskTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/partitioning_test.cpp b/cpp/tests/streams/partitioning_test.cpp new file mode 100644 index 00000000000..636c5c1f1f9 --- /dev/null +++ b/cpp/tests/streams/partitioning_test.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using cudf::test::fixed_width_column_wrapper; +using cudf::test::strings_column_wrapper; + +class PartitionTest : public cudf::test::BaseFixture {}; + +TEST_F(PartitionTest, Struct) +{ + fixed_width_column_wrapper A({1, 2}, {0, 1}); + auto struct_col = cudf::test::structs_column_wrapper({A}, {0, 1}).release(); + auto table_to_partition = cudf::table_view{{*struct_col}}; + fixed_width_column_wrapper map{9, 2}; + + auto num_partitions = 12; + auto result = + cudf::partition(table_to_partition, map, num_partitions, cudf::test::get_default_stream()); +} + +TEST_F(PartitionTest, EmptyInput) +{ + auto const empty_column = fixed_width_column_wrapper{}; + auto const num_partitions = 5; + auto const start_partition = 0; + auto const [out_table, out_offsets] = + cudf::round_robin_partition(cudf::table_view{{empty_column}}, + num_partitions, + start_partition, + cudf::test::get_default_stream()); +} + +TEST_F(PartitionTest, ZeroPartitions) +{ + fixed_width_column_wrapper floats({1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}); + fixed_width_column_wrapper integers({1, 2, 3, 4, 5, 6, 7, 8}); + strings_column_wrapper strings({"a", "bb", "ccc", "d", "ee", "fff", "gg", "h"}); + auto input = cudf::table_view({floats, integers, strings}); + + auto columns_to_hash = std::vector({2}); + + cudf::size_type const num_partitions = 0; + auto [output, offsets] = cudf::hash_partition(input, + columns_to_hash, + num_partitions, + cudf::hash_id::HASH_MURMUR3, + cudf::DEFAULT_HASH_SEED, + cudf::test::get_default_stream()); +} diff --git a/cpp/tests/streams/reduction_test.cpp b/cpp/tests/streams/reduction_test.cpp index b4f013fc960..9ab972302e4 100644 --- a/cpp/tests/streams/reduction_test.cpp +++ b/cpp/tests/streams/reduction_test.cpp @@ -17,11 +17,8 @@ #include #include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/streams/rolling_test.cpp b/cpp/tests/streams/rolling_test.cpp index b352ad2c0d2..4d9899870b4 100644 --- a/cpp/tests/streams/rolling_test.cpp +++ b/cpp/tests/streams/rolling_test.cpp @@ -17,12 +17,10 @@ #include #include #include -#include #include #include #include -#include class RollingTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/round_test.cpp b/cpp/tests/streams/round_test.cpp new file mode 100644 index 00000000000..b8fda022db8 --- /dev/null +++ b/cpp/tests/streams/round_test.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +class RoundTest : public cudf::test::BaseFixture {}; + +TEST_F(RoundTest, RoundHalfToEven) +{ + std::vector vals = {1.729, 17.29, 172.9, 1729}; + cudf::test::fixed_width_column_wrapper input(vals.begin(), vals.end()); + cudf::round(input, 0, cudf::rounding_method::HALF_UP, cudf::test::get_default_stream()); +} + +TEST_F(RoundTest, RoundHalfAwayFromEven) +{ + std::vector vals = {1.5, 2.5, 1.35, 1.45, 15, 25}; + cudf::test::fixed_width_column_wrapper input(vals.begin(), vals.end()); + cudf::round(input, -1, cudf::rounding_method::HALF_EVEN, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp index 07b2d77cc04..e7b282601e1 100644 --- a/cpp/tests/streams/stream_compaction_test.cpp +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -15,20 +15,16 @@ */ #include -#include #include #include #include -#include #include #include #include #include #include -#include - auto constexpr NaN = std::numeric_limits::quiet_NaN(); auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY; auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST; diff --git a/cpp/tests/streams/strings/factory_test.cpp b/cpp/tests/streams/strings/factory_test.cpp index 36e595ab9fa..449e0830b0c 100644 --- a/cpp/tests/streams/strings/factory_test.cpp +++ b/cpp/tests/streams/strings/factory_test.cpp @@ -15,7 +15,6 @@ */ #include -#include #include #include diff --git a/cpp/tests/streams/strings/reverse_test.cpp b/cpp/tests/streams/strings/reverse_test.cpp index 4b4d0a7aff5..154e1c1b715 100644 --- a/cpp/tests/streams/strings/reverse_test.cpp +++ b/cpp/tests/streams/strings/reverse_test.cpp @@ -21,7 +21,6 @@ #include #include -#include class StringsReverseTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp index cf81dc6fb42..9f168abcb31 100644 --- a/cpp/tests/streams/transform_test.cpp +++ b/cpp/tests/streams/transform_test.cpp @@ -15,17 +15,11 @@ */ #include -#include #include #include -#include -#include #include -#include -#include #include -#include #include #include diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index 9c0ecaa52c0..06b9c2fa3c1 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -23,10 +23,8 @@ #include #include #include -#include #include #include -#include #include #include diff --git a/cpp/tests/strings/combine/concatenate_tests.cpp b/cpp/tests/strings/combine/concatenate_tests.cpp index bb57d6f5e8a..e53adcf373a 100644 --- a/cpp/tests/strings/combine/concatenate_tests.cpp +++ b/cpp/tests/strings/combine/concatenate_tests.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/strings/combine/join_list_elements_tests.cpp b/cpp/tests/strings/combine/join_list_elements_tests.cpp index 00317146088..c92f1cfc8f8 100644 --- a/cpp/tests/strings/combine/join_list_elements_tests.cpp +++ b/cpp/tests/strings/combine/join_list_elements_tests.cpp @@ -22,7 +22,6 @@ #include #include #include -#include using namespace cudf::test::iterators; diff --git a/cpp/tests/strings/concatenate_tests.cpp b/cpp/tests/strings/concatenate_tests.cpp index 5cf4015b9e9..51dcc60d95e 100644 --- a/cpp/tests/strings/concatenate_tests.cpp +++ b/cpp/tests/strings/concatenate_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ #include #include -#include #include diff --git a/cpp/tests/strings/datetime_tests.cpp b/cpp/tests/strings/datetime_tests.cpp index b3dc3010c67..da0db0fc056 100644 --- a/cpp/tests/strings/datetime_tests.cpp +++ b/cpp/tests/strings/datetime_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp index 7e0338f1bf4..37b25d9b287 100644 --- a/cpp/tests/strings/extract_tests.cpp +++ b/cpp/tests/strings/extract_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp index 4821a7fa999..7eb4b32d078 100644 --- a/cpp/tests/strings/findall_tests.cpp +++ b/cpp/tests/strings/findall_tests.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -28,8 +27,6 @@ #include -#include - struct StringsFindallTests : public cudf::test::BaseFixture {}; TEST_F(StringsFindallTests, FindallTest) diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index 79054551498..b788c05c152 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -23,8 +23,6 @@ #include #include -#include - struct StringsConvertTest : public cudf::test::BaseFixture {}; template diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 26bcfe8028d..c08effdb969 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -24,9 +24,6 @@ #include #include -#include -#include - #include #include diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp index 219bd6d8b01..a34ff25cb69 100644 --- a/cpp/tests/structs/structs_column_tests.cpp +++ b/cpp/tests/structs/structs_column_tests.cpp @@ -17,28 +17,18 @@ #include #include #include -#include #include #include #include -#include #include -#include #include -#include -#include -#include -#include #include #include #include -#include #include -#include -#include #include #include diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index c33eedf9bd9..c0df2f01a63 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -14,21 +14,15 @@ * limitations under the License. */ -#include "cudf_test/default_stream.hpp" - #include #include #include -#include #include #include #include -#include -#include #include #include -#include #include #include diff --git a/cpp/tests/table/row_operators_tests.cpp b/cpp/tests/table/row_operators_tests.cpp index 5fa63c47cf0..216c4d7b6bb 100644 --- a/cpp/tests/table/row_operators_tests.cpp +++ b/cpp/tests/table/row_operators_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/table/table_tests.cpp b/cpp/tests/table/table_tests.cpp index 1637ba7d7d3..363f1a0ba5d 100644 --- a/cpp/tests/table/table_tests.cpp +++ b/cpp/tests/table/table_tests.cpp @@ -17,17 +17,14 @@ #include #include #include -#include #include #include #include -#include #include #include #include -#include template using column_wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/text/minhash_tests.cpp b/cpp/tests/text/minhash_tests.cpp index e23f3f6e7d8..ef35a4472cf 100644 --- a/cpp/tests/text/minhash_tests.cpp +++ b/cpp/tests/text/minhash_tests.cpp @@ -21,13 +21,9 @@ #include #include -#include #include -#include -#include - #include struct MinHashTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/text/ngrams_tests.cpp b/cpp/tests/text/ngrams_tests.cpp index 1acb4fc4265..c72c7cfc80e 100644 --- a/cpp/tests/text/ngrams_tests.cpp +++ b/cpp/tests/text/ngrams_tests.cpp @@ -28,8 +28,6 @@ #include -#include - struct TextGenerateNgramsTest : public cudf::test::BaseFixture {}; TEST_F(TextGenerateNgramsTest, Ngrams) diff --git a/cpp/tests/text/normalize_tests.cpp b/cpp/tests/text/normalize_tests.cpp index b0d41004e7e..2515cc917fa 100644 --- a/cpp/tests/text/normalize_tests.cpp +++ b/cpp/tests/text/normalize_tests.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/text/stemmer_tests.cpp b/cpp/tests/text/stemmer_tests.cpp index a343913411c..82c4bf53cfc 100644 --- a/cpp/tests/text/stemmer_tests.cpp +++ b/cpp/tests/text/stemmer_tests.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/text/subword_tests.cpp b/cpp/tests/text/subword_tests.cpp index a615780c02a..782551ad66e 100644 --- a/cpp/tests/text/subword_tests.cpp +++ b/cpp/tests/text/subword_tests.cpp @@ -19,13 +19,11 @@ #include #include -#include #include #include #include -#include #include // Global environment for temporary files diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp index 2684123c08a..9437440f34d 100644 --- a/cpp/tests/transform/bools_to_mask_test.cpp +++ b/cpp/tests/transform/bools_to_mask_test.cpp @@ -20,10 +20,8 @@ #include #include -#include #include #include -#include #include diff --git a/cpp/tests/transform/nans_to_null_test.cpp b/cpp/tests/transform/nans_to_null_test.cpp index ba16c100e7a..42ca872a936 100644 --- a/cpp/tests/transform/nans_to_null_test.cpp +++ b/cpp/tests/transform/nans_to_null_test.cpp @@ -17,12 +17,10 @@ #include #include #include -#include #include #include #include -#include template struct NaNsToNullTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/transpose/transpose_test.cpp b/cpp/tests/transpose/transpose_test.cpp index 5a88c402b8c..7797b2b2cf8 100644 --- a/cpp/tests/transpose/transpose_test.cpp +++ b/cpp/tests/transpose/transpose_test.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/tests/types/traits_test.cpp b/cpp/tests/types/traits_test.cpp index 0d9092c33da..46468af515d 100644 --- a/cpp/tests/types/traits_test.cpp +++ b/cpp/tests/types/traits_test.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index 45b89b76070..ed4c1340dbb 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -20,18 +20,15 @@ #include #include -#include #include #include #include #include #include -#include #include #include -#include #include static auto const test_timestamps_D = std::vector{ diff --git a/cpp/tests/unary/math_ops_test.cpp b/cpp/tests/unary/math_ops_test.cpp index 5bfbf70d5f9..663a919f3f4 100644 --- a/cpp/tests/unary/math_ops_test.cpp +++ b/cpp/tests/unary/math_ops_test.cpp @@ -22,10 +22,6 @@ #include #include #include -#include -#include - -#include #include diff --git a/cpp/tests/unary/unary_ops_test.cpp b/cpp/tests/unary/unary_ops_test.cpp index e7477c34642..3c616461c74 100644 --- a/cpp/tests/unary/unary_ops_test.cpp +++ b/cpp/tests/unary/unary_ops_test.cpp @@ -23,7 +23,6 @@ #include #include -#include #include template diff --git a/cpp/tests/utilities/random_seed.cpp b/cpp/tests/utilities/random_seed.cpp index ab5a31ce161..555d89b7dc5 100644 --- a/cpp/tests/utilities/random_seed.cpp +++ b/cpp/tests/utilities/random_seed.cpp @@ -13,8 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include -#include +#include namespace cudf { namespace test { diff --git a/cpp/tests/utilities_tests/column_debug_tests.cpp b/cpp/tests/utilities_tests/column_debug_tests.cpp index 7aa05af4591..2a57d678d07 100644 --- a/cpp/tests/utilities_tests/column_debug_tests.cpp +++ b/cpp/tests/utilities_tests/column_debug_tests.cpp @@ -16,12 +16,9 @@ #include #include -#include #include #include -#include - #include #include diff --git a/cpp/tests/utilities_tests/column_utilities_tests.cpp b/cpp/tests/utilities_tests/column_utilities_tests.cpp index 9d6d5ccb9b5..a13ce825d0b 100644 --- a/cpp/tests/utilities_tests/column_utilities_tests.cpp +++ b/cpp/tests/utilities_tests/column_utilities_tests.cpp @@ -17,20 +17,16 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include -#include - template struct ColumnUtilitiesTest : public cudf::test::BaseFixture { cudf::test::UniformRandomGenerator random; diff --git a/cpp/tests/utilities_tests/column_wrapper_tests.cpp b/cpp/tests/utilities_tests/column_wrapper_tests.cpp index 479c6687e75..339678f3be8 100644 --- a/cpp/tests/utilities_tests/column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/column_wrapper_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp index 5e3fda5e6f7..ff50dc39979 100644 --- a/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp +++ b/cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index fecb896f95a..c1c5776be74 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/cpp/tests/utilities_tests/type_list_tests.cpp b/cpp/tests/utilities_tests/type_list_tests.cpp index 849457056e4..6c3a84763a0 100644 --- a/cpp/tests/utilities_tests/type_list_tests.cpp +++ b/cpp/tests/utilities_tests/type_list_tests.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include using namespace cudf::test; // this will make reading code way easier @@ -23,6 +22,7 @@ namespace { // Work around to remove parentheses surrounding a type template struct argument_type; + template struct argument_type { using type = U; diff --git a/dependencies.yaml b/dependencies.yaml index ff97b67f0ce..90255ca674c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -232,7 +232,7 @@ files: key: cudf-pandas-tests includes: - test_python_cudf_pandas - py_rapids_build_cudf_polars: + py_build_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars extras: @@ -399,21 +399,21 @@ dependencies: - output_types: conda packages: # Align nvcomp version with rapids-cmake - - nvcomp==4.0.1 + - nvcomp==4.1.0.6 specific: - output_types: [requirements, pyproject] matrices: - matrix: cuda: "12.*" packages: - - nvidia-nvcomp-cu12==4.0.1 + - nvidia-nvcomp-cu12==4.1.0.6 - matrix: cuda: "11.*" packages: - - nvidia-nvcomp-cu11==4.0.1 + - nvidia-nvcomp-cu11==4.1.0.6 - matrix: packages: - - nvidia-nvcomp==4.0.1 + - nvidia-nvcomp==4.1.0.6 rapids_build_skbuild: common: - output_types: [conda, requirements, pyproject] @@ -727,7 +727,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.8,<1.9 + - polars>=1.11,<1.13 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] @@ -828,6 +828,7 @@ dependencies: - pytest-benchmark - pytest-cases>=3.8.2 - scipy + - mmh3 - output_types: conda packages: - aiobotocore>=2.2.0 @@ -836,12 +837,14 @@ dependencies: - msgpack-python - moto>=4.0.8 - s3fs>=2022.3.0 - - output_types: pyproject + - python-xxhash + - output_types: [pyproject, requirements] packages: - msgpack - &tokenizers tokenizers==0.15.2 - &transformers transformers==4.39.3 - tzdata + - xxhash specific: - output_types: [conda, requirements] matrices: diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index ecf619ddc44..5942cc16850 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -342,10 +342,7 @@ def clean_all_xml_files(path): "cudf.Series": ("cudf.core.series.Series", "cudf.Series"), "cudf.Index": ("cudf.core.index.Index", "cudf.Index"), "cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"), - # TODO: Replace the first entry in a follow-up with rmm.pylibrmm.device_buffer.DeviceBuffer - # when the RMM objects inventory is generated from branch-24.12. The RMM objects inventory - # can be accessed here : https://docs.rapids.ai/api/rmm/nightly/objects.inv - "DeviceBuffer": ("rmm.DeviceBuffer", "rmm.DeviceBuffer"), + "DeviceBuffer": ("rmm.pylibrmm.device_buffer.DeviceBuffer", "rmm.DeviceBuffer"), } diff --git a/docs/cudf/source/developer_guide/contributing_guide.md b/docs/cudf/source/developer_guide/contributing_guide.md index 6fce268f309..f4d2c7319b3 100644 --- a/docs/cudf/source/developer_guide/contributing_guide.md +++ b/docs/cudf/source/developer_guide/contributing_guide.md @@ -15,8 +15,7 @@ Developers are strongly recommended to set up `pre-commit` prior to any developm The `.pre-commit-config.yaml` file at the root of the repo is the primary source of truth linting. Specifically, cuDF uses the following tools: -- [`ruff`](https://beta.ruff.rs/) checks for general code formatting compliance. -- [`isort`](https://pycqa.github.io/isort/) ensures imports are sorted consistently. +- [`ruff`](https://docs.astral.sh/ruff/) checks for general code formatting compliance. - [`mypy`](http://mypy-lang.org/) performs static type checking. In conjunction with [type hints](https://docs.python.org/3/library/typing.html), `mypy` can help catch various bugs that are otherwise difficult to find. diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb index 95f5f9734dd..46221b6015b 100644 --- a/docs/cudf/source/user_guide/10min.ipynb +++ b/docs/cudf/source/user_guide/10min.ipynb @@ -38,10 +38,10 @@ "import os\n", "\n", "import cupy as cp\n", + "import dask_cudf\n", "import pandas as pd\n", "\n", "import cudf\n", - "import dask_cudf\n", "\n", "cp.random.seed(12)\n", "\n", diff --git a/docs/cudf/source/user_guide/api_docs/index.rst b/docs/cudf/source/user_guide/api_docs/index.rst index d05501f4a4a..f711327f9ed 100644 --- a/docs/cudf/source/user_guide/api_docs/index.rst +++ b/docs/cudf/source/user_guide/api_docs/index.rst @@ -19,7 +19,7 @@ This page provides a list of all publicly accessible modules, methods and classe general_utilities window io - subword_tokenize + tokenize_vocabulary string_handling list_handling struct_handling diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/hashing.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/hashing.rst new file mode 100644 index 00000000000..6bd1fbd821b --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/hashing.rst @@ -0,0 +1,6 @@ +======= +hashing +======= + +.. automodule:: pylibcudf.hashing + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst index 62e14a67ee5..997ece6d29c 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst @@ -19,6 +19,7 @@ This page provides API documentation for pylibcudf. filling gpumemoryview groupby + hashing interop join json diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/byte_pair_encode.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/byte_pair_encode.rst new file mode 100644 index 00000000000..908fcc4fde6 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/byte_pair_encode.rst @@ -0,0 +1,6 @@ +================ +byte_pair_encode +================ + +.. automodule:: pylibcudf.nvtext.byte_pair_encode + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst index 58303356336..9ba47fd8d70 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst @@ -8,4 +8,10 @@ nvtext generate_ngrams jaccard minhash + byte_pair_encode ngrams_tokenize + normalize + replace + stemmer + subword_tokenize + tokenize diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/normalize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/normalize.rst new file mode 100644 index 00000000000..e496f6a45da --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/normalize.rst @@ -0,0 +1,6 @@ +========= +normalize +========= + +.. automodule:: pylibcudf.nvtext.normalize + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/replace.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/replace.rst new file mode 100644 index 00000000000..04cee972dc1 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/replace.rst @@ -0,0 +1,6 @@ +======= +replace +======= + +.. automodule:: pylibcudf.nvtext.replace + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/stemmer.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/stemmer.rst new file mode 100644 index 00000000000..b407ff8451a --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/stemmer.rst @@ -0,0 +1,6 @@ +======= +stemmer +======= + +.. automodule:: pylibcudf.nvtext.stemmer + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/subword_tokenize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/subword_tokenize.rst new file mode 100644 index 00000000000..818714bec6a --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/subword_tokenize.rst @@ -0,0 +1,6 @@ +================ +subword_tokenize +================ + +.. automodule:: pylibcudf.nvtext.subword_tokenize + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/tokenize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/tokenize.rst new file mode 100644 index 00000000000..85c5a27b09d --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/tokenize.rst @@ -0,0 +1,6 @@ +======== +tokenize +======== + +.. automodule:: pylibcudf.nvtext.tokenize + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst index 9850ee10098..699e38ebbe5 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst @@ -1,6 +1,6 @@ -==== -find -==== +======= +findall +======= .. automodule:: pylibcudf.strings.findall :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst index c8c0016126d..ae670b5bd8a 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst @@ -16,6 +16,7 @@ strings regex_flags regex_program repeat + replace_re replace side_type slice diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace_re.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace_re.rst new file mode 100644 index 00000000000..5bf715ef657 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace_re.rst @@ -0,0 +1,6 @@ +========== +replace_re +========== + +.. automodule:: pylibcudf.strings.replace_re + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst index e39ca18a12b..4de9bced86f 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst @@ -1,5 +1,5 @@ ===== -Table +table ===== .. automodule:: pylibcudf.table diff --git a/docs/cudf/source/user_guide/api_docs/string_handling.rst b/docs/cudf/source/user_guide/api_docs/string_handling.rst index ab0f085e1a6..91d3e33960b 100644 --- a/docs/cudf/source/user_guide/api_docs/string_handling.rst +++ b/docs/cudf/source/user_guide/api_docs/string_handling.rst @@ -60,6 +60,7 @@ strings and apply several methods to it. These can be accessed like isupper istimestamp istitle + jaccard_index join len like @@ -67,6 +68,7 @@ strings and apply several methods to it. These can be accessed like lower lstrip match + minhash ngrams ngrams_tokenize normalize_characters @@ -90,7 +92,6 @@ strings and apply several methods to it. These can be accessed like slice_from slice_replace split - rsplit startswith strip swapcase diff --git a/docs/cudf/source/user_guide/api_docs/subword_tokenize.rst b/docs/cudf/source/user_guide/api_docs/subword_tokenize.rst deleted file mode 100644 index cd240fe4db4..00000000000 --- a/docs/cudf/source/user_guide/api_docs/subword_tokenize.rst +++ /dev/null @@ -1,12 +0,0 @@ -================ -SubwordTokenizer -================ -.. currentmodule:: cudf.core.subword_tokenizer - -Constructor -~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - SubwordTokenizer - SubwordTokenizer.__call__ diff --git a/docs/cudf/source/user_guide/api_docs/tokenize_vocabulary.rst b/docs/cudf/source/user_guide/api_docs/tokenize_vocabulary.rst new file mode 100644 index 00000000000..1b5c965f3c9 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/tokenize_vocabulary.rst @@ -0,0 +1,12 @@ +================== +TokenizeVocabulary +================== +.. currentmodule:: cudf.core.tokenize_vocabulary + +Constructor +~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TokenizeVocabulary + TokenizeVocabulary.tokenize diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb index 75eafcc5387..abfe5a1b178 100644 --- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb +++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb @@ -101,6 +101,8 @@ "outputs": [], "source": [ "# define a scalar function\n", + "\n", + "\n", "def f(x):\n", " return x + 1" ] @@ -247,6 +249,8 @@ "outputs": [], "source": [ "# redefine the same function from above\n", + "\n", + "\n", "def f(x):\n", " return x + 1" ] @@ -1622,6 +1626,8 @@ "outputs": [], "source": [ "# a user defined aggregation function.\n", + "\n", + "\n", "def udaf(df):\n", " return df[\"b\"].max() - df[\"b\"].min() / 2" ] diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java index e4106574a19..bfb959b12c1 100644 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -155,6 +155,16 @@ public static HostMemoryBuffer allocate(long bytes) { return allocate(bytes, defaultPreferPinned); } + /** + * Allocate host memory bypassing the default allocator. This is intended to only be used by other allocators. + * Pinned memory will not be used for these allocations. + * @param bytes size in bytes to allocate + * @return the newly created buffer + */ + public static HostMemoryBuffer allocateRaw(long bytes) { + return new HostMemoryBuffer(UnsafeMemoryAccessor.allocate(bytes), bytes); + } + /** * Create a host buffer that is memory-mapped to a file. * @param path path to the file to map into host memory @@ -245,8 +255,10 @@ public final void copyFromHostBuffer(long destOffset, HostMemoryBuffer srcData, * @param destOffset offset in bytes in this buffer to start copying to * @param in input stream to copy bytes from * @param byteLength number of bytes to copy + * @throws EOFException If there are not enough bytes in the stream to copy. + * @throws IOException If there is an error reading from the stream. */ - final void copyFromStream(long destOffset, InputStream in, long byteLength) throws IOException { + public final void copyFromStream(long destOffset, InputStream in, long byteLength) throws IOException { addressOutOfBoundsCheck(address + destOffset, byteLength, "copy from stream"); byte[] arrayBuffer = new byte[(int) Math.min(1024 * 128, byteLength)]; long left = byteLength; @@ -254,7 +266,7 @@ final void copyFromStream(long destOffset, InputStream in, long byteLength) thro int amountToCopy = (int) Math.min(arrayBuffer.length, left); int amountRead = in.read(arrayBuffer, 0, amountToCopy); if (amountRead < 0) { - throw new EOFException(); + throw new EOFException("Unexpected end of stream, expected " + left + " more bytes"); } setBytes(destOffset, arrayBuffer, 0, amountRead); destOffset += amountRead; diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java b/java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java deleted file mode 100644 index 72c2e659372..00000000000 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package ai.rapids.cudf; - -/** - * Represents some amount of host memory that has been reserved. A reservation guarantees that one - * or more allocations up to the reserved amount, minus padding for alignment will succeed. A - * reservation typically guarantees the amount can be allocated one, meaning when a buffer - * allocated from a reservation is freed it is not returned to the reservation, but to the pool of - * memory the reservation originally came from. If more memory is allocated from the reservation - * an OutOfMemoryError may be thrown, but it is not guaranteed to happen. - * - * When the reservation is closed any unused reservation will be returned to the pool of memory - * the reservation came from. - */ -public interface HostMemoryReservation extends HostMemoryAllocator, AutoCloseable {} diff --git a/java/src/main/java/ai/rapids/cudf/RegexFlag.java b/java/src/main/java/ai/rapids/cudf/RegexFlag.java index 7ed8e0354c9..68a3856f37d 100644 --- a/java/src/main/java/ai/rapids/cudf/RegexFlag.java +++ b/java/src/main/java/ai/rapids/cudf/RegexFlag.java @@ -28,7 +28,16 @@ public enum RegexFlag { DEFAULT(0), // default MULTILINE(8), // the '^' and '$' honor new-line characters DOTALL(16), // the '.' matching includes new-line characters - ASCII(256); // use only ASCII when matching built-in character classes + ASCII(256), // use only ASCII when matching built-in character classes + /** + * EXT_NEWLINE(512): Extends line delimiters to include the following Unicode characters + * - NEXT_LINE ('\u0085') + * - LINE_SEPARATOR ('\u2028') + * - PARAGRAPH_SEPARATOR ('\u2029') + * - CARRIAGE_RETURN ('\r') + * - NEW_LINE ('\n') + */ + EXT_NEWLINE(512); final int nativeId; // Native id, for use with libcudf. private RegexFlag(int nativeId) { // Only constant values should be used diff --git a/java/src/main/java/ai/rapids/cudf/Schema.java b/java/src/main/java/ai/rapids/cudf/Schema.java index 76b2799aad6..ae8a0e17f9d 100644 --- a/java/src/main/java/ai/rapids/cudf/Schema.java +++ b/java/src/main/java/ai/rapids/cudf/Schema.java @@ -29,26 +29,52 @@ public class Schema { public static final Schema INFERRED = new Schema(); private final DType topLevelType; + + /** + * Default value for precision value, when it is not specified or the column type is not decimal. + */ + private static final int UNKNOWN_PRECISION = -1; + + /** + * Store precision for the top level column, only applicable if the column is a decimal type. + *

+ * This variable is not designed to be used by any libcudf's APIs since libcudf does not support + * precisions for fixed point numbers. + * Instead, it is used only to pass down the precision values from Spark's DecimalType to the + * JNI level, where some JNI functions require these values to perform their operations. + */ + private final int topLevelPrecision; + private final List childNames; private final List childSchemas; private boolean flattened = false; private String[] flattenedNames; private DType[] flattenedTypes; + private int[] flattenedPrecisions; private int[] flattenedCounts; private Schema(DType topLevelType, + int topLevelPrecision, List childNames, List childSchemas) { this.topLevelType = topLevelType; + this.topLevelPrecision = topLevelPrecision; this.childNames = childNames; this.childSchemas = childSchemas; } + private Schema(DType topLevelType, + List childNames, + List childSchemas) { + this(topLevelType, UNKNOWN_PRECISION, childNames, childSchemas); + } + /** * Inferred schema. */ private Schema() { topLevelType = null; + topLevelPrecision = UNKNOWN_PRECISION; childNames = null; childSchemas = null; } @@ -104,14 +130,17 @@ private void flattenIfNeeded() { if (flatLen == 0) { flattenedNames = null; flattenedTypes = null; + flattenedPrecisions = null; flattenedCounts = null; } else { String[] names = new String[flatLen]; DType[] types = new DType[flatLen]; + int[] precisions = new int[flatLen]; int[] counts = new int[flatLen]; - collectFlattened(names, types, counts, 0); + collectFlattened(names, types, precisions, counts, 0); flattenedNames = names; flattenedTypes = types; + flattenedPrecisions = precisions; flattenedCounts = counts; } flattened = true; @@ -128,19 +157,20 @@ private int flattenedLength(int startingLength) { return startingLength; } - private int collectFlattened(String[] names, DType[] types, int[] counts, int offset) { + private int collectFlattened(String[] names, DType[] types, int[] precisions, int[] counts, int offset) { if (childSchemas != null) { for (int i = 0; i < childSchemas.size(); i++) { Schema child = childSchemas.get(i); names[offset] = childNames.get(i); types[offset] = child.topLevelType; + precisions[offset] = child.topLevelPrecision; if (child.childNames != null) { counts[offset] = child.childNames.size(); } else { counts[offset] = 0; } offset++; - offset = this.childSchemas.get(i).collectFlattened(names, types, counts, offset); + offset = this.childSchemas.get(i).collectFlattened(names, types, precisions, counts, offset); } } return offset; @@ -226,6 +256,22 @@ public int[] getFlattenedTypeScales() { return ret; } + /** + * Get decimal precisions of the columns' types flattened from all levels in schema by + * depth-first traversal. + *

+ * This is used to pass down the decimal precisions from Spark to only the JNI layer, where + * some JNI functions require precision values to perform their operations. + * Decimal precisions should not be consumed by any libcudf's APIs since libcudf does not + * support precisions for fixed point numbers. + * + * @return An array containing decimal precision of all columns in schema. + */ + public int[] getFlattenedDecimalPrecisions() { + flattenIfNeeded(); + return flattenedPrecisions; + } + /** * Get the types of the columns in schema flattened from all levels by depth-first traversal. * @return An array containing types of all columns in schema. @@ -307,11 +353,13 @@ public HostColumnVector.DataType asHostDataType() { public static class Builder { private final DType topLevelType; + private final int topLevelPrecision; private final List names; private final List types; - private Builder(DType topLevelType) { + private Builder(DType topLevelType, int topLevelPrecision) { this.topLevelType = topLevelType; + this.topLevelPrecision = topLevelPrecision; if (topLevelType == DType.STRUCT || topLevelType == DType.LIST) { // There can be children names = new ArrayList<>(); @@ -322,14 +370,19 @@ private Builder(DType topLevelType) { } } + private Builder(DType topLevelType) { + this(topLevelType, UNKNOWN_PRECISION); + } + /** * Add a new column * @param type the type of column to add * @param name the name of the column to add (Ignored for list types) + * @param precision the decimal precision, only applicable for decimal types * @return the builder for the new column. This should really only be used when the type * passed in is a LIST or a STRUCT. */ - public Builder addColumn(DType type, String name) { + public Builder addColumn(DType type, String name, int precision) { if (names == null) { throw new IllegalStateException("A column of type " + topLevelType + " cannot have children"); @@ -340,21 +393,31 @@ public Builder addColumn(DType type, String name) { if (names.contains(name)) { throw new IllegalStateException("Cannot add duplicate names to a schema"); } - Builder ret = new Builder(type); + Builder ret = new Builder(type, precision); types.add(ret); names.add(name); return ret; } + public Builder addColumn(DType type, String name) { + return addColumn(type, name, UNKNOWN_PRECISION); + } + /** * Adds a single column to the current schema. addColumn is preferred as it can be used * to support nested types. * @param type the type of the column. * @param name the name of the column. + * @param precision the decimal precision, only applicable for decimal types. * @return this for chaining. */ + public Builder column(DType type, String name, int precision) { + addColumn(type, name, precision); + return this; + } + public Builder column(DType type, String name) { - addColumn(type, name); + addColumn(type, name, UNKNOWN_PRECISION); return this; } @@ -366,7 +429,7 @@ public Schema build() { children.add(b.build()); } } - return new Schema(topLevelType, names, children); + return new Schema(topLevelType, topLevelPrecision, names, children); } } } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 708744569df..14c290b300a 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -31,6 +31,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumSet; import java.util.List; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; @@ -3877,6 +3878,43 @@ void testExtractRe() { } } + @Test +void testExtractReWithMultiLineDelimiters() { + String NEXT_LINE = "\u0085"; + String LINE_SEPARATOR = "\u2028"; + String PARAGRAPH_SEPARATOR = "\u2029"; + String CARRIAGE_RETURN = "\r"; + String NEW_LINE = "\n"; + + try (ColumnVector input = ColumnVector.fromStrings( + "boo:" + NEXT_LINE + "boo::" + LINE_SEPARATOR + "boo:::", + "boo:::" + LINE_SEPARATOR + "zzé" + CARRIAGE_RETURN + "lll", + "boo::", + "", + "boo::" + NEW_LINE, + "boo::" + CARRIAGE_RETURN, + "boo:" + NEXT_LINE + "boo::" + PARAGRAPH_SEPARATOR, + "boo:" + NEW_LINE + "boo::" + LINE_SEPARATOR, + "boo:" + NEXT_LINE + "boo::" + NEXT_LINE); + Table expected_ext_newline = new Table.TestBuilder() + .column("boo:::", null, "boo::", null, "boo::", "boo::", "boo::", "boo::", "boo::") + .build(); + Table expected_default = new Table.TestBuilder() + .column("boo:::", null, "boo::", null, "boo::", null, null, null, null) + .build()) { + + // Regex pattern to match 'boo:' followed by one or more colons at the end of the string + try (Table found = input.extractRe(new RegexProgram("(boo:+)$", EnumSet.of(RegexFlag.EXT_NEWLINE)))) { + assertColumnsAreEqual(expected_ext_newline.getColumns()[0], found.getColumns()[0]); + } + + try (Table found = input.extractRe(new RegexProgram("(boo:+)$", EnumSet.of(RegexFlag.DEFAULT)))) { + assertColumnsAreEqual(expected_default.getColumns()[0], found.getColumns()[0]); + } + } + } + + @Test void testExtractAllRecord() { String pattern = "([ab])(\\d)"; diff --git a/pyproject.toml b/pyproject.toml index 661c68ee62e..6933484f4e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,8 @@ select = [ "F", # pycodestyle Warning "W", + # isort + "I", # no-blank-line-before-function "D201", # one-blank-line-after-class diff --git a/python/cudf/benchmarks/conftest.py b/python/cudf/benchmarks/conftest.py index 7b2b71cf216..0e4afadccf5 100644 --- a/python/cudf/benchmarks/conftest.py +++ b/python/cudf/benchmarks/conftest.py @@ -56,27 +56,23 @@ # into the main repo. sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common")) -from config import cudf # noqa: W0611, E402, F401 -from utils import ( # noqa: E402 - OrderedSet, - collapse_fixtures, - column_generators, - make_fixture, -) - # Turn off isort until we upgrade to 5.8.0 # https://github.com/pycqa/isort/issues/1594 -# isort: off from config import ( # noqa: W0611, E402, F401 NUM_COLS, NUM_ROWS, collect_ignore, + cudf, # noqa: W0611, E402, F401 pytest_collection_modifyitems, pytest_sessionfinish, pytest_sessionstart, ) - -# isort: on +from utils import ( # noqa: E402 + OrderedSet, + collapse_fixtures, + column_generators, + make_fixture, +) @pytest_cases.fixture(params=[0, 1], ids=["AxisIndex", "AxisColumn"]) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 065655505b8..94dbdf5534d 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -688,15 +688,18 @@ cdef class Column: # special case for string column is_string_column = (cv.type().id() == libcudf_types.type_id.STRING) if is_string_column: - # get the size from offset child column (device to host copy) - offsets_column_index = 0 - offset_child_column = cv.child(offsets_column_index) - if offset_child_column.size() == 0: + if cv.num_children() == 0: base_nbytes = 0 else: - chars_size = get_element( - offset_child_column, offset_child_column.size()-1).value - base_nbytes = chars_size + # get the size from offset child column (device to host copy) + offsets_column_index = 0 + offset_child_column = cv.child(offsets_column_index) + if offset_child_column.size() == 0: + base_nbytes = 0 + else: + chars_size = get_element( + offset_child_column, offset_child_column.size()-1).value + base_nbytes = chars_size if data_ptr: if data_owner is None: diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 265b92ff645..c26e1de23a1 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -4,7 +4,7 @@ import pickle from libc.stdint cimport uint8_t, uintptr_t from libcpp cimport bool -from libcpp.memory cimport make_shared, shared_ptr, unique_ptr +from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from libcpp.vector cimport vector @@ -30,10 +30,6 @@ from libcpp.memory cimport make_unique cimport pylibcudf.libcudf.contiguous_split as cpp_contiguous_split from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.lists.gather cimport ( - segmented_gather as cpp_segmented_gather, -) -from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport size_type @@ -340,26 +336,6 @@ def get_element(Column input_column, size_type index): ) -@acquire_spill_lock() -def segmented_gather(Column source_column, Column gather_map): - cdef shared_ptr[lists_column_view] source_LCV = ( - make_shared[lists_column_view](source_column.view()) - ) - cdef shared_ptr[lists_column_view] gather_map_LCV = ( - make_shared[lists_column_view](gather_map.view()) - ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_segmented_gather( - source_LCV.get()[0], gather_map_LCV.get()[0]) - ) - - result = Column.from_unique_ptr(move(c_result)) - return result - - cdef class _CPackedColumns: @staticmethod diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx index 9b7ab0888d2..89309b36371 100644 --- a/python/cudf/cudf/_lib/hash.pyx +++ b/python/cudf/cudf/_lib/hash.pyx @@ -1,27 +1,12 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cudf.core.buffer import acquire_spill_lock +import pylibcudf as plc -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move +from cudf.core.buffer import acquire_spill_lock -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.hash cimport ( - md5, - murmurhash3_x86_32, - sha1, - sha224, - sha256, - sha384, - sha512, - xxhash_64, -) -from pylibcudf.libcudf.table.table_view cimport table_view +from pylibcudf.table cimport Table from cudf._lib.column cimport Column -from cudf._lib.utils cimport table_view_from_columns - -import pylibcudf as plc @acquire_spill_lock() @@ -37,32 +22,26 @@ def hash_partition(list source_columns, list columns_to_hash, @acquire_spill_lock() def hash(list source_columns, str method, int seed=0): - cdef table_view c_source_view = table_view_from_columns(source_columns) - cdef unique_ptr[column] c_result + cdef Table ctbl = Table( + [c.to_pylibcudf(mode="read") for c in source_columns] + ) if method == "murmur3": - with nogil: - c_result = move(murmurhash3_x86_32(c_source_view, seed)) + return Column.from_pylibcudf(plc.hashing.murmurhash3_x86_32(ctbl, seed)) + elif method == "xxhash64": + return Column.from_pylibcudf(plc.hashing.xxhash_64(ctbl, seed)) elif method == "md5": - with nogil: - c_result = move(md5(c_source_view)) + return Column.from_pylibcudf(plc.hashing.md5(ctbl)) elif method == "sha1": - with nogil: - c_result = move(sha1(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha1(ctbl)) elif method == "sha224": - with nogil: - c_result = move(sha224(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha224(ctbl)) elif method == "sha256": - with nogil: - c_result = move(sha256(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha256(ctbl)) elif method == "sha384": - with nogil: - c_result = move(sha384(c_source_view)) + return Column.from_pylibcudf(plc.hashing.sha384(ctbl)) elif method == "sha512": - with nogil: - c_result = move(sha512(c_source_view)) - elif method == "xxhash64": - with nogil: - c_result = move(xxhash_64(c_source_view, seed)) + return Column.from_pylibcudf(plc.hashing.sha512(ctbl)) else: - raise ValueError(f"Unsupported hash function: {method}") - return Column.from_unique_ptr(move(c_result)) + raise ValueError( + f"Unsupported hashing algorithm {method}." + ) diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 1dc586bb257..1c9d3a01b80 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -1,49 +1,22 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cpython cimport pycapsule -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - import pylibcudf -from pylibcudf.libcudf.interop cimport ( - DLManagedTensor, - from_dlpack as cpp_from_dlpack, - to_dlpack as cpp_to_dlpack, -) -from pylibcudf.libcudf.table.table cimport table -from pylibcudf.libcudf.table.table_view cimport table_view - -from cudf._lib.utils cimport ( - columns_from_pylibcudf_table, - columns_from_unique_ptr, - table_view_from_columns, -) +from cudf._lib.utils cimport columns_from_pylibcudf_table from cudf.core.buffer import acquire_spill_lock from cudf.core.dtypes import ListDtype, StructDtype -def from_dlpack(dlpack_capsule): +def from_dlpack(object dlpack_capsule): """ Converts a DLPack Tensor PyCapsule into a list of columns. DLPack Tensor PyCapsule is expected to have the name "dltensor". """ - cdef DLManagedTensor* dlpack_tensor = pycapsule.\ - PyCapsule_GetPointer(dlpack_capsule, 'dltensor') - pycapsule.PyCapsule_SetName(dlpack_capsule, 'used_dltensor') - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_from_dlpack(dlpack_tensor) - ) - - res = columns_from_unique_ptr(move(c_result)) - dlpack_tensor.deleter(dlpack_tensor) - return res + return columns_from_pylibcudf_table( + pylibcudf.interop.from_dlpack(dlpack_capsule) + ) def to_dlpack(list source_columns): @@ -52,39 +25,13 @@ def to_dlpack(list source_columns): DLPack Tensor PyCapsule will have the name "dltensor". """ - if any(column.null_count for column in source_columns): - raise ValueError( - "Cannot create a DLPack tensor with null values. \ - Input is required to have null count as zero." - ) - - cdef DLManagedTensor *dlpack_tensor - cdef table_view source_table_view = table_view_from_columns(source_columns) - - with nogil: - dlpack_tensor = cpp_to_dlpack( - source_table_view + return pylibcudf.interop.to_dlpack( + pylibcudf.Table( + [col.to_pylibcudf(mode="read") for col in source_columns] ) - - return pycapsule.PyCapsule_New( - dlpack_tensor, - 'dltensor', - dlmanaged_tensor_pycapsule_deleter ) -cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept: - cdef DLManagedTensor* dlpack_tensor = 0 - try: - dlpack_tensor = pycapsule.PyCapsule_GetPointer( - pycap_obj, 'used_dltensor') - return # we do not call a used capsule's deleter - except Exception: - dlpack_tensor = pycapsule.PyCapsule_GetPointer( - pycap_obj, 'dltensor') - dlpack_tensor.deleter(dlpack_tensor) - - def gather_metadata(object cols_dtypes): """ Generates a ColumnMetadata vector for each column. diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 7e8710bedb6..12432ac6d5d 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport null_order, size_type from cudf._lib.column cimport Column from cudf._lib.utils cimport columns_from_pylibcudf_table -import pylibcudf +import pylibcudf as plc from pylibcudf cimport Scalar @@ -17,7 +17,7 @@ from pylibcudf cimport Scalar @acquire_spill_lock() def count_elements(Column col): return Column.from_pylibcudf( - pylibcudf.lists.count_elements( + plc.lists.count_elements( col.to_pylibcudf(mode="read")) ) @@ -25,8 +25,8 @@ def count_elements(Column col): @acquire_spill_lock() def explode_outer(list source_columns, int explode_column_idx): return columns_from_pylibcudf_table( - pylibcudf.lists.explode_outer( - pylibcudf.Table([c.to_pylibcudf(mode="read") for c in source_columns]), + plc.lists.explode_outer( + plc.Table([c.to_pylibcudf(mode="read") for c in source_columns]), explode_column_idx, ) ) @@ -35,7 +35,7 @@ def explode_outer(list source_columns, int explode_column_idx): @acquire_spill_lock() def distinct(Column col, bool nulls_equal, bool nans_all_equal): return Column.from_pylibcudf( - pylibcudf.lists.distinct( + plc.lists.distinct( col.to_pylibcudf(mode="read"), nulls_equal, nans_all_equal, @@ -46,7 +46,7 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal): @acquire_spill_lock() def sort_lists(Column col, bool ascending, str na_position): return Column.from_pylibcudf( - pylibcudf.lists.sort_lists( + plc.lists.sort_lists( col.to_pylibcudf(mode="read"), ascending, null_order.BEFORE if na_position == "first" else null_order.AFTER, @@ -58,7 +58,7 @@ def sort_lists(Column col, bool ascending, str na_position): @acquire_spill_lock() def extract_element_scalar(Column col, size_type index): return Column.from_pylibcudf( - pylibcudf.lists.extract_list_element( + plc.lists.extract_list_element( col.to_pylibcudf(mode="read"), index, ) @@ -68,7 +68,7 @@ def extract_element_scalar(Column col, size_type index): @acquire_spill_lock() def extract_element_column(Column col, Column index): return Column.from_pylibcudf( - pylibcudf.lists.extract_list_element( + plc.lists.extract_list_element( col.to_pylibcudf(mode="read"), index.to_pylibcudf(mode="read"), ) @@ -78,7 +78,7 @@ def extract_element_column(Column col, Column index): @acquire_spill_lock() def contains_scalar(Column col, py_search_key): return Column.from_pylibcudf( - pylibcudf.lists.contains( + plc.lists.contains( col.to_pylibcudf(mode="read"), py_search_key.device_value.c_value, ) @@ -88,7 +88,7 @@ def contains_scalar(Column col, py_search_key): @acquire_spill_lock() def index_of_scalar(Column col, object py_search_key): return Column.from_pylibcudf( - pylibcudf.lists.index_of( + plc.lists.index_of( col.to_pylibcudf(mode="read"), py_search_key.device_value.c_value, True, @@ -99,7 +99,7 @@ def index_of_scalar(Column col, object py_search_key): @acquire_spill_lock() def index_of_column(Column col, Column search_keys): return Column.from_pylibcudf( - pylibcudf.lists.index_of( + plc.lists.index_of( col.to_pylibcudf(mode="read"), search_keys.to_pylibcudf(mode="read"), True, @@ -110,8 +110,8 @@ def index_of_column(Column col, Column search_keys): @acquire_spill_lock() def concatenate_rows(list source_columns): return Column.from_pylibcudf( - pylibcudf.lists.concatenate_rows( - pylibcudf.Table([ + plc.lists.concatenate_rows( + plc.Table([ c.to_pylibcudf(mode="read") for c in source_columns ]) ) @@ -121,8 +121,18 @@ def concatenate_rows(list source_columns): @acquire_spill_lock() def concatenate_list_elements(Column input_column, dropna=False): return Column.from_pylibcudf( - pylibcudf.lists.concatenate_list_elements( + plc.lists.concatenate_list_elements( input_column.to_pylibcudf(mode="read"), dropna, ) ) + + +@acquire_spill_lock() +def segmented_gather(Column source_column, Column gather_map): + return Column.from_pylibcudf( + plc.lists.segmented_gather( + source_column.to_pylibcudf(mode="read"), + gather_map.to_pylibcudf(mode="read"), + ) + ) diff --git a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx index 0d768e24f39..2b2762eead2 100644 --- a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx +++ b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx @@ -3,49 +3,22 @@ from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.byte_pair_encode cimport ( - bpe_merge_pairs as cpp_bpe_merge_pairs, - byte_pair_encoding as cpp_byte_pair_encoding, - load_merge_pairs as cpp_load_merge_pairs, -) -from pylibcudf.libcudf.scalar.scalar cimport string_scalar - from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar - -cdef class BPEMergePairs: - cdef unique_ptr[cpp_bpe_merge_pairs] c_obj - - def __cinit__(self, Column merge_pairs): - cdef column_view c_pairs = merge_pairs.view() - with nogil: - self.c_obj = move(cpp_load_merge_pairs(c_pairs)) +from pylibcudf import nvtext +from pylibcudf.nvtext.byte_pair_encode import BPEMergePairs # no-cython-lint @acquire_spill_lock() def byte_pair_encoding( Column strings, - BPEMergePairs merge_pairs, + object merge_pairs, object separator ): - cdef column_view c_strings = strings.view() - cdef DeviceScalar d_separator = separator.device_value - cdef const string_scalar* c_separator = d_separator\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_byte_pair_encoding( - c_strings, - merge_pairs.c_obj.get()[0], - c_separator[0] - ) + return Column.from_pylibcudf( + nvtext.byte_pair_encode.byte_pair_encoding( + strings.to_pylibcudf(mode="read"), + merge_pairs, + separator.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) + ) diff --git a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx index 6521116eafe..c125d92a24e 100644 --- a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx +++ b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx @@ -14,10 +14,11 @@ def ngrams_tokenize( object py_delimiter, object py_separator ): - result = nvtext.ngrams_tokenize.ngrams_tokenize( - input.to_pylibcudf(mode="read"), - ngrams, - py_delimiter.device_value.c_value, - py_separator.device_value.c_value + return Column.from_pylibcudf( + nvtext.ngrams_tokenize.ngrams_tokenize( + input.to_pylibcudf(mode="read"), + ngrams, + py_delimiter.device_value.c_value, + py_separator.device_value.c_value + ) ) - return Column.from_pylibcudf(result) diff --git a/python/cudf/cudf/_lib/nvtext/normalize.pyx b/python/cudf/cudf/_lib/nvtext/normalize.pyx index 5e86a9ce959..cc45123dd0a 100644 --- a/python/cudf/cudf/_lib/nvtext/normalize.pyx +++ b/python/cudf/cudf/_lib/nvtext/normalize.pyx @@ -3,36 +3,26 @@ from cudf.core.buffer import acquire_spill_lock from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.normalize cimport ( - normalize_characters as cpp_normalize_characters, - normalize_spaces as cpp_normalize_spaces, -) from cudf._lib.column cimport Column - -@acquire_spill_lock() -def normalize_spaces(Column strings): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_normalize_spaces(c_strings)) - - return Column.from_unique_ptr(move(c_result)) +from pylibcudf import nvtext @acquire_spill_lock() -def normalize_characters(Column strings, bool do_lower=True): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result +def normalize_spaces(Column input): + return Column.from_pylibcudf( + nvtext.normalize.normalize_spaces( + input.to_pylibcudf(mode="read") + ) + ) - with nogil: - c_result = move(cpp_normalize_characters(c_strings, do_lower)) - return Column.from_unique_ptr(move(c_result)) +@acquire_spill_lock() +def normalize_characters(Column input, bool do_lower=True): + return Column.from_pylibcudf( + nvtext.normalize.normalize_characters( + input.to_pylibcudf(mode="read"), + do_lower, + ) + ) diff --git a/python/cudf/cudf/_lib/nvtext/replace.pyx b/python/cudf/cudf/_lib/nvtext/replace.pyx index 61ae3da5782..bec56ade83c 100644 --- a/python/cudf/cudf/_lib/nvtext/replace.pyx +++ b/python/cudf/cudf/_lib/nvtext/replace.pyx @@ -2,20 +2,10 @@ from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.replace cimport ( - filter_tokens as cpp_filter_tokens, - replace_tokens as cpp_replace_tokens, -) -from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar +from pylibcudf import nvtext @acquire_spill_lock() @@ -30,27 +20,14 @@ def replace_tokens(Column strings, provided. """ - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef column_view c_targets = targets.view() - cdef column_view c_replacements = replacements.view() - - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_replace_tokens( - c_strings, - c_targets, - c_replacements, - c_delimiter[0], - ) + return Column.from_pylibcudf( + nvtext.replace.replace_tokens( + strings.to_pylibcudf(mode="read"), + targets.to_pylibcudf(mode="read"), + replacements.to_pylibcudf(mode="read"), + py_delimiter.device_value.c_value, ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() @@ -65,24 +42,11 @@ def filter_tokens(Column strings, character provided. """ - cdef DeviceScalar replacement = py_replacement.device_value - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_repl = replacement\ - .get_raw_ptr() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_filter_tokens( - c_strings, - min_token_length, - c_repl[0], - c_delimiter[0], - ) + return Column.from_pylibcudf( + nvtext.replace.filter_tokens( + strings.to_pylibcudf(mode="read"), + min_token_length, + py_replacement.device_value.c_value, + py_delimiter.device_value.c_value, ) - - return Column.from_unique_ptr(move(c_result)) + ) diff --git a/python/cudf/cudf/_lib/nvtext/stemmer.pyx b/python/cudf/cudf/_lib/nvtext/stemmer.pyx index 5bf25562fed..63a389b64d5 100644 --- a/python/cudf/cudf/_lib/nvtext/stemmer.pyx +++ b/python/cudf/cudf/_lib/nvtext/stemmer.pyx @@ -1,24 +1,19 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cudf.core.buffer import acquire_spill_lock - -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - from enum import IntEnum -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view +from cudf.core.buffer import acquire_spill_lock + from pylibcudf.libcudf.nvtext.stemmer cimport ( - is_letter as cpp_is_letter, letter_type, - porter_stemmer_measure as cpp_porter_stemmer_measure, underlying_type_t_letter_type, ) from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column +from pylibcudf import nvtext + class LetterType(IntEnum): CONSONANT = letter_type.CONSONANT @@ -27,43 +22,34 @@ class LetterType(IntEnum): @acquire_spill_lock() def porter_stemmer_measure(Column strings): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_porter_stemmer_measure(c_strings)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + nvtext.stemmer.porter_stemmer_measure( + strings.to_pylibcudf(mode="read"), + ) + ) @acquire_spill_lock() def is_letter(Column strings, object ltype, size_type index): - cdef column_view c_strings = strings.view() - cdef letter_type c_ltype = ( - ltype + return Column.from_pylibcudf( + nvtext.stemmer.is_letter( + strings.to_pylibcudf(mode="read"), + ltype==LetterType.VOWEL, + index, + ) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_is_letter(c_strings, c_ltype, index)) - - return Column.from_unique_ptr(move(c_result)) @acquire_spill_lock() def is_letter_multi(Column strings, object ltype, Column indices): - cdef column_view c_strings = strings.view() - cdef column_view c_indices = indices.view() - cdef letter_type c_ltype = ( - ltype + return Column.from_pylibcudf( + nvtext.stemmer.is_letter( + strings.to_pylibcudf(mode="read"), + ltype==LetterType.VOWEL, + indices.to_pylibcudf(mode="read"), + ) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_is_letter(c_strings, c_ltype, c_indices)) - - return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx index ee442ece5c6..5e0bfb74705 100644 --- a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx +++ b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx @@ -5,35 +5,16 @@ from libc.stdint cimport uint32_t from cudf.core.buffer import acquire_spill_lock from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.subword_tokenize cimport ( - hashed_vocabulary as cpp_hashed_vocabulary, - load_vocabulary_file as cpp_load_vocabulary_file, - move as tr_move, - subword_tokenize as cpp_subword_tokenize, - tokenizer_result as cpp_tokenizer_result, -) from cudf._lib.column cimport Column - -cdef class Hashed_Vocabulary: - cdef unique_ptr[cpp_hashed_vocabulary] c_obj - - def __cinit__(self, hash_file): - cdef string c_hash_file = str(hash_file).encode() - with nogil: - self.c_obj = move(cpp_load_vocabulary_file(c_hash_file)) +from pylibcudf import nvtext @acquire_spill_lock() def subword_tokenize_inmem_hash( Column strings, - Hashed_Vocabulary hashed_vocabulary, + object hashed_vocabulary, uint32_t max_sequence_length=64, uint32_t stride=48, bool do_lower=True, @@ -42,21 +23,16 @@ def subword_tokenize_inmem_hash( """ Subword tokenizes text series by using the pre-loaded hashed vocabulary """ - cdef column_view c_strings = strings.view() - cdef cpp_tokenizer_result c_result - with nogil: - c_result = tr_move( - cpp_subword_tokenize( - c_strings, - hashed_vocabulary.c_obj.get()[0], - max_sequence_length, - stride, - do_lower, - do_truncate, - ) - ) + result = nvtext.subword_tokenize.subword_tokenize( + strings.to_pylibcudf(mode="read"), + hashed_vocabulary, + max_sequence_length, + stride, + do_lower, + do_truncate, + ) # return the 3 tensor components - tokens = Column.from_unique_ptr(move(c_result.tensor_token_ids)) - masks = Column.from_unique_ptr(move(c_result.tensor_attention_mask)) - metadata = Column.from_unique_ptr(move(c_result.tensor_metadata)) + tokens = Column.from_pylibcudf(result[0]) + masks = Column.from_pylibcudf(result[1]) + metadata = Column.from_pylibcudf(result[2]) return tokens, masks, metadata diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx index a7e63f1e9ae..f473c48e2f7 100644 --- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx +++ b/python/cudf/cudf/_lib/nvtext/tokenize.pyx @@ -2,162 +2,85 @@ from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.nvtext.tokenize cimport ( - character_tokenize as cpp_character_tokenize, - count_tokens as cpp_count_tokens, - detokenize as cpp_detokenize, - load_vocabulary as cpp_load_vocabulary, - tokenize as cpp_tokenize, - tokenize_vocabulary as cpp_tokenize_vocabulary, - tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, -) -from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type +from pylibcudf.nvtext.tokenize import TokenizeVocabulary # no-cython-lint + from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar + +from pylibcudf import nvtext @acquire_spill_lock() def _tokenize_scalar(Column strings, object py_delimiter): - - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_tokenize( - c_strings, - c_delimiter[0], - ) + return Column.from_pylibcudf( + nvtext.tokenize.tokenize_scalar( + strings.to_pylibcudf(mode="read"), + py_delimiter.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def _tokenize_column(Column strings, Column delimiters): - cdef column_view c_strings = strings.view() - cdef column_view c_delimiters = delimiters.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_tokenize( - c_strings, - c_delimiters - ) + return Column.from_pylibcudf( + nvtext.tokenize.tokenize_column( + strings.to_pylibcudf(mode="read"), + delimiters.to_pylibcudf(mode="read"), ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def _count_tokens_scalar(Column strings, object py_delimiter): - - cdef DeviceScalar delimiter = py_delimiter.device_value - - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_count_tokens( - c_strings, - c_delimiter[0] - ) + return Column.from_pylibcudf( + nvtext.tokenize.count_tokens_scalar( + strings.to_pylibcudf(mode="read"), + py_delimiter.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def _count_tokens_column(Column strings, Column delimiters): - cdef column_view c_strings = strings.view() - cdef column_view c_delimiters = delimiters.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_count_tokens( - c_strings, - c_delimiters - ) + return Column.from_pylibcudf( + nvtext.tokenize.count_tokens_column( + strings.to_pylibcudf(mode="read"), + delimiters.to_pylibcudf(mode="read") ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def character_tokenize(Column strings): - cdef column_view c_strings = strings.view() - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_character_tokenize(c_strings) + return Column.from_pylibcudf( + nvtext.tokenize.character_tokenize( + strings.to_pylibcudf(mode="read") ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def detokenize(Column strings, Column indices, object py_separator): - - cdef DeviceScalar separator = py_separator.device_value - - cdef column_view c_strings = strings.view() - cdef column_view c_indices = indices.view() - cdef const string_scalar* c_separator = separator\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_detokenize(c_strings, c_indices, c_separator[0]) + return Column.from_pylibcudf( + nvtext.tokenize.detokenize( + strings.to_pylibcudf(mode="read"), + indices.to_pylibcudf(mode="read"), + py_separator.device_value.c_value ) - - return Column.from_unique_ptr(move(c_result)) - - -cdef class TokenizeVocabulary: - cdef unique_ptr[cpp_tokenize_vocabulary] c_obj - - def __cinit__(self, Column vocab): - cdef column_view c_vocab = vocab.view() - with nogil: - self.c_obj = move(cpp_load_vocabulary(c_vocab)) + ) @acquire_spill_lock() def tokenize_with_vocabulary(Column strings, - TokenizeVocabulary vocabulary, + object vocabulary, object py_delimiter, size_type default_id): - - cdef DeviceScalar delimiter = py_delimiter.device_value - cdef column_view c_strings = strings.view() - cdef const string_scalar* c_delimiter = delimiter\ - .get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move( - cpp_tokenize_with_vocabulary( - c_strings, - vocabulary.c_obj.get()[0], - c_delimiter[0], - default_id - ) + return Column.from_pylibcudf( + nvtext.tokenize.tokenize_with_vocabulary( + strings.to_pylibcudf(mode="read"), + vocabulary, + py_delimiter.device_value.c_value, + default_id ) - - return Column.from_unique_ptr(move(c_result)) + ) diff --git a/python/cudf/cudf/_lib/strings/replace_re.pyx b/python/cudf/cudf/_lib/strings/replace_re.pyx index fffc8b7c3f6..462d5c903e8 100644 --- a/python/cudf/cudf/_lib/strings/replace_re.pyx +++ b/python/cudf/cudf/_lib/strings/replace_re.pyx @@ -1,26 +1,11 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cython.operator cimport dereference -from libcpp.memory cimport unique_ptr -from libcpp.string cimport string -from libcpp.utility cimport move -from libcpp.vector cimport vector +from pylibcudf.libcudf.types cimport size_type +import pylibcudf as plc from cudf.core.buffer import acquire_spill_lock -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from pylibcudf.libcudf.strings.regex_flags cimport regex_flags -from pylibcudf.libcudf.strings.regex_program cimport regex_program -from pylibcudf.libcudf.strings.replace_re cimport ( - replace_re as cpp_replace_re, - replace_with_backrefs as cpp_replace_with_backrefs, -) -from pylibcudf.libcudf.types cimport size_type - from cudf._lib.column cimport Column -from cudf._lib.scalar cimport DeviceScalar @acquire_spill_lock() @@ -34,28 +19,16 @@ def replace_re(Column source_strings, `n` indicates the number of resplacements to be made from start. (-1 indicates all) """ - - cdef DeviceScalar repl = py_repl.device_value - - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - cdef string pattern_string = str(pattern).encode() - cdef const string_scalar* scalar_repl = \ - (repl.get_raw_ptr()) - cdef regex_flags c_flags = regex_flags.DEFAULT - cdef unique_ptr[regex_program] c_prog - - with nogil: - c_prog = move(regex_program.create(pattern_string, c_flags)) - c_result = move(cpp_replace_re( - source_view, - dereference(c_prog), - scalar_repl[0], - n - )) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.replace_re.replace_re( + source_strings.to_pylibcudf(mode="read"), + plc.strings.regex_program.RegexProgram.create( + str(pattern), + plc.strings.regex_flags.RegexFlags.DEFAULT + ), + py_repl.device_value.c_value, + n + ) + return Column.from_pylibcudf(plc_column) @acquire_spill_lock() @@ -68,50 +41,29 @@ def replace_with_backrefs( new string with the extracted elements found using `pattern` regular expression in `source_strings`. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - cdef string pattern_string = str(pattern).encode() - cdef string repl_string = str(repl).encode() - cdef regex_flags c_flags = regex_flags.DEFAULT - cdef unique_ptr[regex_program] c_prog - - with nogil: - c_prog = move(regex_program.create(pattern_string, c_flags)) - c_result = move(cpp_replace_with_backrefs( - source_view, - dereference(c_prog), - repl_string - )) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.replace_re.replace_with_backrefs( + source_strings.to_pylibcudf(mode="read"), + plc.strings.regex_program.RegexProgram.create( + str(pattern), + plc.strings.regex_flags.RegexFlags.DEFAULT + ), + repl + ) + return Column.from_pylibcudf(plc_column) @acquire_spill_lock() def replace_multi_re(Column source_strings, - object patterns, + list patterns, Column repl_strings): """ Returns a Column after replacing occurrences of multiple regular expressions `patterns` with their corresponding strings in `repl_strings` in `source_strings`. """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - cdef column_view repl_view = repl_strings.view() - - cdef int pattern_size = len(patterns) - cdef vector[string] patterns_vector - patterns_vector.reserve(pattern_size) - - for pattern in patterns: - patterns_vector.push_back(str.encode(pattern)) - - with nogil: - c_result = move(cpp_replace_re( - source_view, - patterns_vector, - repl_view - )) - - return Column.from_unique_ptr(move(c_result)) + plc_column = plc.strings.replace_re.replace_re( + source_strings.to_pylibcudf(mode="read"), + patterns, + repl_strings.to_pylibcudf(mode="read") + ) + return Column.from_pylibcudf(plc_column) diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 40d0c9eac3a..1589e23f716 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -7,20 +7,11 @@ from cudf.core._internals.expressions import parse_expression from cudf.core.buffer import acquire_spill_lock, as_buffer from cudf.utils import cudautils -from cython.operator cimport dereference -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - -cimport pylibcudf.libcudf.transform as libcudf_transform from pylibcudf cimport transform as plc_transform from pylibcudf.expressions cimport Expression -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.expressions cimport expression -from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column -from cudf._lib.utils cimport table_view_from_columns import pylibcudf as plc @@ -121,13 +112,8 @@ def compute_column(list columns, tuple column_names, expr: str): # At the end, all the stack contains is the expression to evaluate. cdef Expression cudf_expr = visitor.expression - cdef table_view tbl = table_view_from_columns(columns) - cdef unique_ptr[column] col - with nogil: - col = move( - libcudf_transform.compute_column( - tbl, - dereference(cudf_expr.c_obj.get()) - ) - ) - return Column.from_unique_ptr(move(col)) + result = plc_transform.compute_column( + plc.Table([col.to_pylibcudf(mode="read") for col in columns]), + cudf_expr, + ) + return Column.from_pylibcudf(result) diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py index 6e8ad556b08..3b13cc258ab 100644 --- a/python/cudf/cudf/_typing.py +++ b/python/cudf/cudf/_typing.py @@ -1,8 +1,8 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. import sys -from collections.abc import Callable -from typing import TYPE_CHECKING, Any, Dict, Iterable, TypeVar, Union +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING, Any, TypeVar, Union import numpy as np from pandas import Period, Timedelta, Timestamp @@ -42,7 +42,7 @@ SeriesOrSingleColumnIndex = Union["cudf.Series", "cudf.core.index.Index"] # Groupby aggregation -AggType = Union[str, Callable] -MultiColumnAggType = Union[ - AggType, Iterable[AggType], Dict[Any, Iterable[AggType]] +AggType = Union[str, Callable] # noqa: UP007 +MultiColumnAggType = Union[ # noqa: UP007 + AggType, Iterable[AggType], dict[Any, Iterable[AggType]] ] diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py index caff019f575..ffa306bf93f 100644 --- a/python/cudf/cudf/core/buffer/buffer.py +++ b/python/cudf/cudf/core/buffer/buffer.py @@ -6,7 +6,7 @@ import pickle import weakref from types import SimpleNamespace -from typing import Any, Literal, Mapping +from typing import TYPE_CHECKING, Any, Literal import numpy from typing_extensions import Self @@ -18,6 +18,9 @@ from cudf.core.abc import Serializable from cudf.utils.string import format_bytes +if TYPE_CHECKING: + from collections.abc import Mapping + def host_memory_allocation(nbytes: int) -> memoryview: """Allocate host memory using NumPy diff --git a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py index 0bd8d6054b3..ecf9807cfc2 100644 --- a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py +++ b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py @@ -2,13 +2,16 @@ from __future__ import annotations -from typing import Literal, Mapping +from typing import TYPE_CHECKING, Literal from typing_extensions import Self import cudf from cudf.core.buffer.buffer import Buffer, BufferOwner +if TYPE_CHECKING: + from collections.abc import Mapping + class ExposureTrackedBuffer(Buffer): """An exposure tracked buffer. diff --git a/python/cudf/cudf/core/byte_pair_encoding.py b/python/cudf/cudf/core/byte_pair_encoding.py index 6ca64a0a2be..8d38a5f2272 100644 --- a/python/cudf/cudf/core/byte_pair_encoding.py +++ b/python/cudf/cudf/core/byte_pair_encoding.py @@ -2,9 +2,10 @@ from __future__ import annotations +import pylibcudf as plc + import cudf from cudf._lib.nvtext.byte_pair_encode import ( - BPEMergePairs as cpp_merge_pairs, byte_pair_encoding as cpp_byte_pair_encoding, ) @@ -25,7 +26,9 @@ class BytePairEncoder: """ def __init__(self, merges_pair: "cudf.Series"): - self.merge_pairs = cpp_merge_pairs(merges_pair._column) + self.merge_pairs = plc.nvtext.byte_pair_encode.BPEMergePairs( + merges_pair._column.to_pylibcudf(mode="read") + ) def __call__(self, text, separator: str = " ") -> cudf.Series: """ diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py index 06791df7dc0..a1e87d04bc9 100644 --- a/python/cudf/cudf/core/column/__init__.py +++ b/python/cudf/cudf/core/column/__init__.py @@ -29,4 +29,3 @@ Decimal128Column, DecimalBaseColumn, ) -from cudf.core.column.interval import IntervalColumn # noqa: F401 diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 864e87b5377..087d0ed65f5 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -4,7 +4,7 @@ import warnings from functools import cached_property -from typing import TYPE_CHECKING, Any, Mapping, Sequence, cast +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd @@ -26,6 +26,7 @@ if TYPE_CHECKING: from collections import abc + from collections.abc import Mapping, Sequence import numba.cuda diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7674565e2c3..d2f9d208c77 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -4,10 +4,11 @@ import pickle from collections import abc +from collections.abc import MutableSequence, Sequence from functools import cached_property from itertools import chain from types import SimpleNamespace -from typing import TYPE_CHECKING, Any, Literal, MutableSequence, Sequence, cast +from typing import TYPE_CHECKING, Any, Literal, cast import cupy import numpy as np @@ -579,8 +580,8 @@ def _wrap_binop_normalization(self, other): if cudf.utils.utils.is_na_like(other): return cudf.Scalar(other, dtype=self.dtype) if isinstance(other, np.ndarray) and other.ndim == 0: - # Try and maintain the dtype - other = other.dtype.type(other.item()) + # Return numpy scalar + other = other[()] return self.normalize_binop_value(other) def _scatter_by_slice( diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 2c9b0baa9b6..b6dc250e64d 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -8,7 +8,7 @@ import locale import re from locale import nl_langinfo -from typing import TYPE_CHECKING, Literal, Sequence, cast +from typing import TYPE_CHECKING, Literal, cast import numpy as np import pandas as pd @@ -31,6 +31,8 @@ from cudf.utils.utils import _all_bools_with_nulls if TYPE_CHECKING: + from collections.abc import Sequence + from cudf._typing import ( ColumnBinaryOperand, DatetimeLikeScalar, diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 8803ebd6791..8ae06f72d1e 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -3,8 +3,9 @@ from __future__ import annotations import warnings +from collections.abc import Sequence from decimal import Decimal -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, cast import cupy as cp import numpy as np diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index c6a39199e3b..6b25e568f00 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -3,7 +3,7 @@ from __future__ import annotations from functools import cached_property -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, cast import numpy as np import pandas as pd @@ -11,7 +11,6 @@ from typing_extensions import Self import cudf -from cudf._lib.copying import segmented_gather from cudf._lib.lists import ( concatenate_list_elements, concatenate_rows, @@ -22,6 +21,7 @@ extract_element_scalar, index_of_column, index_of_scalar, + segmented_gather, sort_lists, ) from cudf._lib.strings.convert.convert_lists import format_list_column @@ -34,6 +34,8 @@ from cudf.core.missing import NA if TYPE_CHECKING: + from collections.abc import Sequence + from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike from cudf.core.buffer import Buffer diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index 05a0ab2e09a..a91c080fe21 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -2,9 +2,7 @@ from __future__ import annotations -from typing import Union, overload - -from typing_extensions import Literal +from typing import Literal, Union, overload import cudf import cudf.core.column diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 78d2814ed26..620cae65374 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -3,7 +3,7 @@ from __future__ import annotations import functools -from typing import TYPE_CHECKING, Any, Sequence, cast +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd @@ -28,7 +28,7 @@ from .numerical_base import NumericalBaseColumn if TYPE_CHECKING: - from collections.abc import Callable + from collections.abc import Callable, Sequence from cudf._typing import ( ColumnBinaryOperand, diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 45d1a8b087b..856ce0f75de 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5,7 +5,7 @@ import re import warnings from functools import cached_property -from typing import TYPE_CHECKING, Sequence, cast, overload +from typing import TYPE_CHECKING, cast, overload import numpy as np import pandas as pd @@ -35,6 +35,8 @@ def str_to_boolean(column: StringColumn): if TYPE_CHECKING: + from collections.abc import Sequence + import cupy import numba.cuda @@ -998,7 +1000,7 @@ def replace( return self._return_or_inplace( libstrings.replace_multi_re( self._column, - pat, + list(pat), column.as_column(repl, dtype="str"), ) if regex diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 6b6f3e517a8..087d6474e7f 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -4,7 +4,7 @@ import datetime import functools -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, cast import numpy as np import pandas as pd @@ -19,6 +19,8 @@ from cudf.utils.utils import _all_bools_with_nulls if TYPE_CHECKING: + from collections.abc import Sequence + from cudf._typing import ColumnBinaryOperand, DatetimeLikeScalar, Dtype _unit_to_nanoseconds_conversion = { diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index bc093fdaa9a..496e86ed709 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -5,8 +5,9 @@ import itertools import sys from collections import abc +from collections.abc import Mapping from functools import cached_property, reduce -from typing import TYPE_CHECKING, Any, Mapping, cast +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 7d4d34f5b04..bf1c39b23da 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -13,8 +13,8 @@ import textwrap import warnings from collections import abc, defaultdict -from collections.abc import Callable, Iterator -from typing import TYPE_CHECKING, Any, Literal, MutableMapping, cast +from collections.abc import Callable, Iterator, MutableMapping +from typing import TYPE_CHECKING, Any, Literal, cast import cupy import numba diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py index 5250a741d3d..aa601a2b322 100644 --- a/python/cudf/cudf/core/df_protocol.py +++ b/python/cudf/cudf/core/df_protocol.py @@ -3,7 +3,7 @@ import enum from collections import abc -from typing import Any, Iterable, Mapping, Sequence, Tuple, cast +from typing import TYPE_CHECKING, Any, cast import cupy as cp import numpy as np @@ -20,6 +20,9 @@ build_column, ) +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping, Sequence + # Implementation of interchange protocol classes # ---------------------------------------------- @@ -61,7 +64,7 @@ class _MaskKind(enum.IntEnum): _DtypeKind.BOOL, _DtypeKind.STRING, } -ProtoDtype = Tuple[_DtypeKind, int, str, str] +ProtoDtype = tuple[_DtypeKind, int, str, str] class _CuDFBuffer: diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 37ad6b8fabb..205edd91d9d 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -6,7 +6,7 @@ import pickle import warnings from collections import abc -from typing import TYPE_CHECKING, Any, Literal, MutableMapping +from typing import TYPE_CHECKING, Any, Literal # TODO: The `numpy` import is needed for typing purposes during doc builds # only, need to figure out why the `np` alias is insufficient then remove. @@ -36,6 +36,7 @@ from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf if TYPE_CHECKING: + from collections.abc import MutableMapping from types import ModuleType from cudf._typing import Dtype, ScalarLike diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 81b20488d8d..e59b948aba9 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -8,7 +8,7 @@ import warnings from collections import abc from functools import cached_property -from typing import TYPE_CHECKING, Any, Iterable, Literal +from typing import TYPE_CHECKING, Any, Literal import cupy as cp import numpy as np @@ -36,6 +36,8 @@ from cudf.utils.utils import GetAttrGetItemMixin if TYPE_CHECKING: + from collections.abc import Iterable + from cudf._typing import ( AggType, DataFrameOrSeries, @@ -479,6 +481,11 @@ def get_group(self, name, obj=None): "instead of ``gb.get_group(name, obj=df)``.", FutureWarning, ) + if is_list_like(self._by): + if isinstance(name, tuple) and len(name) == 1: + name = name[0] + else: + raise KeyError(name) return obj.iloc[self.indices[name]] @_performance_tracking diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index cd07c58c5d9..1b90e9f9df0 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -5,10 +5,10 @@ import operator import pickle import warnings -from collections.abc import Hashable +from collections.abc import Hashable, MutableMapping from functools import cache, cached_property from numbers import Number -from typing import TYPE_CHECKING, Any, Literal, MutableMapping, cast +from typing import TYPE_CHECKING, Any, Literal, cast import cupy import numpy as np diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 5952815deef..e031f2a4e8e 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -10,9 +10,7 @@ from typing import ( TYPE_CHECKING, Any, - Callable, Literal, - MutableMapping, TypeVar, cast, ) @@ -63,6 +61,8 @@ from cudf.utils.utils import _warn_no_dask_cudf if TYPE_CHECKING: + from collections.abc import Callable, MutableMapping + from cudf._typing import ( ColumnLike, DataFrameOrSeries, diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py index 8182e5cede2..ce6a5c960dd 100644 --- a/python/cudf/cudf/core/indexing_utils.py +++ b/python/cudf/cudf/core/indexing_utils.py @@ -3,9 +3,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, List, Union - -from typing_extensions import TypeAlias +from typing import Any, TypeAlias import cudf from cudf.api.types import _is_scalar_or_zero_d_array, is_integer @@ -46,11 +44,11 @@ class ScalarIndexer: key: GatherMap -IndexingSpec: TypeAlias = Union[ - EmptyIndexer, MapIndexer, MaskIndexer, ScalarIndexer, SliceIndexer -] +IndexingSpec: TypeAlias = ( + EmptyIndexer | MapIndexer | MaskIndexer | ScalarIndexer | SliceIndexer +) -ColumnLabels: TypeAlias = List[str] +ColumnLabels: TypeAlias = list[str] def destructure_iloc_key( diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 92d094d9de5..bfff62f0a89 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -8,7 +8,7 @@ import pickle import warnings from functools import cached_property -from typing import TYPE_CHECKING, Any, MutableMapping +from typing import TYPE_CHECKING, Any import cupy as cp import numpy as np @@ -36,7 +36,7 @@ from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name if TYPE_CHECKING: - from collections.abc import Generator, Hashable + from collections.abc import Generator, Hashable, MutableMapping from typing_extensions import Self diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 29ed18ac0ce..9b60424c924 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -9,7 +9,7 @@ import warnings from collections import abc from shutil import get_terminal_size -from typing import TYPE_CHECKING, Any, Literal, MutableMapping +from typing import TYPE_CHECKING, Any, Literal import cupy import numpy as np @@ -71,6 +71,8 @@ from cudf.utils.performance_tracking import _performance_tracking if TYPE_CHECKING: + from collections.abc import MutableMapping + import pyarrow as pa from cudf._typing import ( diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index 9e59b134b73..dda1f199078 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -6,8 +6,9 @@ import cupy as cp +import pylibcudf as plc + from cudf._lib.nvtext.subword_tokenize import ( - Hashed_Vocabulary as cpp_hashed_vocabulary, subword_tokenize_inmem_hash as cpp_subword_tokenize, ) @@ -50,7 +51,9 @@ class SubwordTokenizer: def __init__(self, hash_file: str, do_lower_case: bool = True): self.do_lower_case = do_lower_case - self.vocab_file = cpp_hashed_vocabulary(hash_file) + self.vocab_file = plc.nvtext.subword_tokenize.HashedVocabulary( + hash_file + ) def __call__( self, diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py index 99d85c0c5c0..1e31376cce8 100644 --- a/python/cudf/cudf/core/tokenize_vocabulary.py +++ b/python/cudf/cudf/core/tokenize_vocabulary.py @@ -2,9 +2,10 @@ from __future__ import annotations +import pylibcudf as plc + import cudf from cudf._lib.nvtext.tokenize import ( - TokenizeVocabulary as cpp_tokenize_vocabulary, tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, ) @@ -20,7 +21,9 @@ class TokenizeVocabulary: """ def __init__(self, vocabulary: "cudf.Series"): - self.vocabulary = cpp_tokenize_vocabulary(vocabulary._column) + self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary( + vocabulary._column.to_pylibcudf(mode="read") + ) def tokenize( self, text, delimiter: str = "", default_id: int = -1 diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 68f34fa28ff..885e7b16644 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -4,7 +4,7 @@ import math import re import warnings -from typing import Literal, Sequence +from typing import TYPE_CHECKING, Literal import numpy as np import pandas as pd @@ -20,6 +20,9 @@ from cudf.core import column from cudf.core.index import ensure_index +if TYPE_CHECKING: + from collections.abc import Sequence + # https://github.com/pandas-dev/pandas/blob/2.2.x/pandas/core/tools/datetimes.py#L1112 _unit_map = { "year": "year", diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index c364d55e677..73afde407db 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -10,9 +10,9 @@ import pickle import types import warnings -from collections.abc import Callable, Iterator +from collections.abc import Callable, Iterator, Mapping from enum import IntEnum -from typing import Any, Literal, Mapping +from typing import Any, Literal import numpy as np diff --git a/python/cudf/cudf/pandas/module_accelerator.py b/python/cudf/cudf/pandas/module_accelerator.py index f82e300e83d..38103a71908 100644 --- a/python/cudf/cudf/pandas/module_accelerator.py +++ b/python/cudf/cudf/pandas/module_accelerator.py @@ -17,7 +17,7 @@ from abc import abstractmethod from importlib._bootstrap import _ImportLockContext as ImportLock from types import ModuleType -from typing import Any, ContextManager, NamedTuple +from typing import Any, ContextManager, NamedTuple # noqa: UP035 from typing_extensions import Self diff --git a/python/cudf/cudf/pandas/scripts/analyze-test-failures.py b/python/cudf/cudf/pandas/scripts/analyze-test-failures.py index 8870fbc5c28..bb2fc00d9fc 100644 --- a/python/cudf/cudf/pandas/scripts/analyze-test-failures.py +++ b/python/cudf/cudf/pandas/scripts/analyze-test-failures.py @@ -9,6 +9,7 @@ python analyze-test-failures.py Example: +------- python analyze-test-failures.py log.json frame/* """ diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index d12d2697729..59966a5ff0c 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -35,7 +35,7 @@ def null_assert_warnings(*args, **kwargs): @pytest.fixture(scope="session", autouse=True) # type: ignore def patch_testing_functions(): - tm.assert_produces_warning = null_assert_warnings + tm.assert_produces_warning = null_assert_warnings # noqa: F821 pytest.raises = replace_kwargs({"match": None})(pytest.raises) diff --git a/python/cudf/cudf/pandas/scripts/summarize-test-results.py b/python/cudf/cudf/pandas/scripts/summarize-test-results.py index 4ea0b3b4413..a0ad872e4c7 100644 --- a/python/cudf/cudf/pandas/scripts/summarize-test-results.py +++ b/python/cudf/cudf/pandas/scripts/summarize-test-results.py @@ -5,7 +5,8 @@ """ Summarizes the test results per module. -Examples: +Examples +-------- python summarize-test-results.py log.json python summarize-test-results.py log.json --output json python summarize-test-results.py log.json --output table diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 949fa909b5b..71b6bbd688d 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -3431,3 +3431,16 @@ def test_binop_eq_ne_index_series(data1, data2): expected = gi.to_pandas() != gs.to_pandas() assert_eq(expected, actual) + + +@pytest.mark.parametrize("scalar", [np.datetime64, np.timedelta64]) +def test_binop_lhs_numpy_datetimelike_scalar(scalar): + slr1 = scalar(1, "ms") + slr2 = scalar(1, "ns") + result = slr1 < cudf.Series([slr2]) + expected = slr1 < pd.Series([slr2]) + assert_eq(result, expected) + + result = slr2 < cudf.Series([slr1]) + expected = slr2 < pd.Series([slr1]) + assert_eq(result, expected) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 6b222841622..e4422e204bc 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -4059,3 +4059,19 @@ def test_ndim(): pgb = pser.groupby([0, 0, 1]) ggb = gser.groupby(cudf.Series([0, 0, 1])) assert pgb.ndim == ggb.ndim + + +@pytest.mark.skipif( + not PANDAS_GE_220, reason="pandas behavior applicable in >=2.2" +) +def test_get_group_list_like(): + df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df.groupby(["a"]).get_group((1,)) + expected = df.to_pandas().groupby(["a"]).get_group((1,)) + assert_eq(result, expected) + + with pytest.raises(KeyError): + df.groupby(["a"]).get_group((1, 2)) + + with pytest.raises(KeyError): + df.groupby(["a"]).get_group([1]) diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py index a75a20a4681..63fd9601fc1 100644 --- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py +++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py @@ -387,7 +387,8 @@ def test_dir_bound_method( ): """This test will fail because dir for bound methods is currently incorrect, but we have no way to fix it without materializing the slow - type, which is unnecessarily expensive.""" + type, which is unnecessarily expensive. + """ Fast, FastIntermediate = fast_and_intermediate_with_doc Slow, SlowIntermediate = slow_and_intermediate_with_doc diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index feab04ffadc..b6105c17b3e 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -53,6 +53,7 @@ test = [ "cramjam", "fastavro>=0.22.9", "hypothesis", + "mmh3", "msgpack", "pytest-benchmark", "pytest-cases>=3.8.2", @@ -63,6 +64,7 @@ test = [ "tokenizers==0.15.2", "transformers==4.39.3", "tzdata", + "xxhash", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. pandas-tests = [ "ipython", @@ -81,50 +83,6 @@ cudf-pandas-tests = [ Homepage = "https://github.com/rapidsai/cudf" Documentation = "https://docs.rapids.ai/api/cudf/stable/" -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", - "pylibcudf" -] -known_first_party = [ - "cudf", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] - [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" @@ -174,3 +132,18 @@ wheel.packages = ["cudf"] provider = "scikit_build_core.metadata.regex" input = "cudf/VERSION" regex = "(?P.*)" + +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["cudf"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm", "pylibcudf"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 87e19a2bccf..667cd7b1db8 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -32,51 +32,20 @@ test = [ Homepage = "https://github.com/rapidsai/cudf" Documentation = "https://docs.rapids.ai/api/cudf/stable/" -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", - "streamz", -] -known_rapids = [ - "rmm", - "cudf", - "dask_cudf", -] -known_first_party = [ - "cudf_kafka", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["cudf_kafka"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda", "streamz"] +rapids = ["rmm", "cudf", "dask_cudf"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py index 2c195f6637c..08bc9d0ea3f 100644 --- a/python/cudf_polars/cudf_polars/containers/dataframe.py +++ b/python/cudf_polars/cudf_polars/containers/dataframe.py @@ -9,10 +9,11 @@ from typing import TYPE_CHECKING, cast import pyarrow as pa -import pylibcudf as plc import polars as pl +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.utils import dtypes diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py index b8b18ec5039..2af9fdaacc5 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column @@ -30,14 +31,13 @@ class Agg(Expr): - __slots__ = ("name", "options", "op", "request", "children") + __slots__ = ("name", "options", "op", "request") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, dtype: plc.DataType, name: str, options: Any, *children: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.name = name self.options = options self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/base.py b/python/cudf_polars/cudf_polars/dsl/expressions/base.py index 8d021b0231d..effe8cb2378 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/base.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/base.py @@ -13,9 +13,10 @@ import pylibcudf as plc from cudf_polars.containers import Column +from cudf_polars.dsl.nodebase import Node if TYPE_CHECKING: - from collections.abc import Mapping, Sequence + from collections.abc import Mapping from cudf_polars.containers import Column, DataFrame @@ -32,100 +33,16 @@ class ExecutionContext(IntEnum): ROLLING = enum.auto() -class Expr: - """ - An abstract expression object. +class Expr(Node["Expr"]): + """An abstract expression object.""" - This contains a (potentially empty) tuple of child expressions, - along with non-child data. For uniform reconstruction and - implementation of hashing and equality schemes, child classes need - to provide a certain amount of metadata when they are defined. - Specifically, the ``_non_child`` attribute must list, in-order, - the names of the slots that are passed to the constructor. The - constructor must take arguments in the order ``(*_non_child, - *children).`` - """ - - __slots__ = ("dtype", "_hash_value", "_repr_value") + __slots__ = ("dtype",) dtype: plc.DataType """Data type of the expression.""" - _hash_value: int - """Caching slot for the hash of the expression.""" - _repr_value: str - """Caching slot for repr of the expression.""" - children: tuple[Expr, ...] = () - """Children of the expression.""" + # This annotation is needed because of https://github.com/python/mypy/issues/17981 _non_child: ClassVar[tuple[str, ...]] = ("dtype",) """Names of non-child data (not Exprs) for reconstruction.""" - # Constructor must take arguments in order (*_non_child, *children) - def __init__(self, dtype: plc.DataType) -> None: - self.dtype = dtype - - def _ctor_arguments(self, children: Sequence[Expr]) -> Sequence: - return (*(getattr(self, attr) for attr in self._non_child), *children) - - def get_hash(self) -> int: - """ - Return the hash of this expr. - - Override this in subclasses, rather than __hash__. - - Returns - ------- - The integer hash value. - """ - return hash((type(self), self._ctor_arguments(self.children))) - - def __hash__(self) -> int: - """Hash of an expression with caching.""" - try: - return self._hash_value - except AttributeError: - self._hash_value = self.get_hash() - return self._hash_value - - def is_equal(self, other: Any) -> bool: - """ - Equality of two expressions. - - Override this in subclasses, rather than __eq__. - - Parameter - --------- - other - object to compare to - - Returns - ------- - True if the two expressions are equal, false otherwise. - """ - if type(self) is not type(other): - return False # pragma: no cover; __eq__ trips first - return self._ctor_arguments(self.children) == other._ctor_arguments( - other.children - ) - - def __eq__(self, other: Any) -> bool: - """Equality of expressions.""" - if type(self) is not type(other) or hash(self) != hash(other): - return False - else: - return self.is_equal(other) - - def __ne__(self, other: Any) -> bool: - """Inequality of expressions.""" - return not self.__eq__(other) - - def __repr__(self) -> str: - """String representation of an expression with caching.""" - try: - return self._repr_value - except AttributeError: - args = ", ".join(f"{arg!r}" for arg in self._ctor_arguments(self.children)) - self._repr_value = f"{type(self).__name__}({args})" - return self._repr_value - def do_evaluate( self, df: DataFrame, @@ -311,11 +228,11 @@ class Col(Expr): __slots__ = ("name",) _non_child = ("dtype", "name") name: str - children: tuple[()] def __init__(self, dtype: plc.DataType, name: str) -> None: self.dtype = dtype self.name = name + self.children = () def do_evaluate( self, diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py index 19baae3611d..245bdbefe88 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py @@ -8,10 +8,10 @@ from typing import TYPE_CHECKING, ClassVar -import pylibcudf as plc - from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr @@ -24,9 +24,8 @@ class BinOp(Expr): - __slots__ = ("op", "children") + __slots__ = ("op",) _non_child = ("dtype", "op") - children: tuple[Expr, Expr] def __init__( self, @@ -35,7 +34,7 @@ def __init__( left: Expr, right: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype if plc.traits.is_boolean(self.dtype): # For boolean output types, bitand and bitor implement # boolean logic, so translate. bitxor also does, but the diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py index ff9973a47d5..8db8172ebd1 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py @@ -10,10 +10,11 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa -import pylibcudf as plc from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import ( ExecutionContext, @@ -31,9 +32,8 @@ class BooleanFunction(Expr): - __slots__ = ("name", "options", "children") + __slots__ = ("name", "options") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, @@ -42,7 +42,7 @@ def __init__( options: tuple[Any, ...], *children: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.name = name self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py index f752a23b628..65fa4bfa62f 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py @@ -9,10 +9,11 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa -import pylibcudf as plc from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import ExecutionContext, Expr @@ -25,7 +26,7 @@ class TemporalFunction(Expr): - __slots__ = ("name", "options", "children") + __slots__ = ("name", "options") _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = { pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR, pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH, @@ -39,7 +40,6 @@ class TemporalFunction(Expr): pl_expr.TemporalFunction.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND, } _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, @@ -48,7 +48,7 @@ def __init__( options: tuple[Any, ...], *children: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.name = name self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py index 562a2255033..c16313bf83c 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column @@ -16,7 +17,7 @@ from cudf_polars.utils import dtypes if TYPE_CHECKING: - from collections.abc import Mapping + from collections.abc import Hashable, Mapping import pyarrow as pa @@ -31,12 +32,12 @@ class Literal(Expr): __slots__ = ("value",) _non_child = ("dtype", "value") value: pa.Scalar[Any] - children: tuple[()] def __init__(self, dtype: plc.DataType, value: pa.Scalar[Any]) -> None: - super().__init__(dtype) + self.dtype = dtype assert value.type == plc.interop.to_arrow(dtype) self.value = value + self.children = () def do_evaluate( self, @@ -58,19 +59,19 @@ class LiteralColumn(Expr): __slots__ = ("value",) _non_child = ("dtype", "value") value: pa.Array[Any, Any] - children: tuple[()] def __init__(self, dtype: plc.DataType, value: pl.Series) -> None: - super().__init__(dtype) + self.dtype = dtype data = value.to_arrow() self.value = data.cast(dtypes.downcast_arrow_lists(data.type)) + self.children = () - def get_hash(self) -> int: + def get_hashable(self) -> Hashable: """Compute a hash of the column.""" # This is stricter than necessary, but we only need this hash # for identity in groupby replacements so it's OK. And this # way we avoid doing potentially expensive compute. - return hash((type(self), self.dtype, id(self.value))) + return (type(self), self.dtype, id(self.value)) def do_evaluate( self, diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py index f7dcc3c542c..fa68bcb9426 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py @@ -17,24 +17,22 @@ class RollingWindow(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr] def __init__(self, dtype: plc.DataType, options: Any, agg: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (agg,) raise NotImplementedError("Rolling window not implemented") class GroupedRollingWindow(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr, ...] def __init__(self, dtype: plc.DataType, options: Any, agg: Expr, *by: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (agg, *by) raise NotImplementedError("Grouped rolling window not implemented") diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/selection.py b/python/cudf_polars/cudf_polars/dsl/expressions/selection.py index a7a3e68a28c..77d7d4c0d22 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/selection.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/selection.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column @@ -23,12 +24,11 @@ class Gather(Expr): - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr, Expr] def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.children = (values, indices) def do_evaluate( @@ -65,12 +65,11 @@ def do_evaluate( class Filter(Expr): - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr, Expr] def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr): - super().__init__(dtype) + self.dtype = dtype self.children = (values, indices) def do_evaluate( diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py b/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py index 861b73ce6a0..99512e2ef52 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/sorting.py @@ -23,14 +23,13 @@ class Sort(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr] def __init__( self, dtype: plc.DataType, options: tuple[bool, bool, bool], column: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (column,) @@ -59,9 +58,8 @@ def do_evaluate( class SortBy(Expr): - __slots__ = ("options", "children") + __slots__ = ("options",) _non_child = ("dtype", "options") - children: tuple[Expr, ...] def __init__( self, @@ -70,7 +68,7 @@ def __init__( column: Expr, *by: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.children = (column, *by) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py index 6669669aadc..8b66c9d4676 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py @@ -10,11 +10,12 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc from polars.exceptions import InvalidOperationError from polars.polars import _expr_nodes as pl_expr +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.expressions.base import ExecutionContext, Expr from cudf_polars.dsl.expressions.literal import Literal, LiteralColumn @@ -28,9 +29,8 @@ class StringFunction(Expr): - __slots__ = ("name", "options", "children", "_regex_program") + __slots__ = ("name", "options", "_regex_program") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] def __init__( self, @@ -39,7 +39,7 @@ def __init__( options: tuple[Any, ...], *children: Expr, ) -> None: - super().__init__(dtype) + self.dtype = dtype self.options = options self.name = name self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py b/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py index c7d7a802ded..d2b5d6bae29 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/ternary.py @@ -26,14 +26,13 @@ class Ternary(Expr): - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr, Expr, Expr] def __init__( self, dtype: plc.DataType, when: Expr, then: Expr, otherwise: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.children = (when, then, otherwise) def do_evaluate( diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py index 3d4d15be1ce..6f22544c050 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa + import pylibcudf as plc from cudf_polars.containers import Column @@ -26,12 +27,11 @@ class Cast(Expr): """Class representing a cast of an expression.""" - __slots__ = ("children",) + __slots__ = () _non_child = ("dtype",) - children: tuple[Expr] def __init__(self, dtype: plc.DataType, value: Expr) -> None: - super().__init__(dtype) + self.dtype = dtype self.children = (value,) if not dtypes.can_cast(value.dtype, self.dtype): raise NotImplementedError( @@ -60,7 +60,9 @@ def collect_agg(self, *, depth: int) -> AggInfo: class Len(Expr): """Class representing the length of an expression.""" - children: tuple[()] + def __init__(self, dtype: plc.DataType) -> None: + self.dtype = dtype + self.children = () def do_evaluate( self, @@ -90,9 +92,8 @@ def collect_agg(self, *, depth: int) -> AggInfo: class UnaryFunction(Expr): """Class representing unary functions of an expression.""" - __slots__ = ("name", "options", "children") + __slots__ = ("name", "options") _non_child = ("dtype", "name", "options") - children: tuple[Expr, ...] # Note: log, and pow are handled via translation to binops _OP_MAPPING: ClassVar[dict[str, plc.unary.UnaryOperator]] = { @@ -142,7 +143,7 @@ class UnaryFunction(Expr): def __init__( self, dtype: plc.DataType, name: str, options: tuple[Any, ...], *children: Expr ) -> None: - super().__init__(dtype) + self.dtype = dtype self.name = name self.options = options self.children = children diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index e319c363a23..04aa74024cd 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -13,24 +13,27 @@ from __future__ import annotations -import dataclasses import itertools +import json from functools import cache from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar import pyarrow as pa -import pylibcudf as plc from typing_extensions import assert_never import polars as pl +import pylibcudf as plc + import cudf_polars.dsl.expr as expr from cudf_polars.containers import Column, DataFrame -from cudf_polars.utils import dtypes, sorting +from cudf_polars.dsl.nodebase import Node +from cudf_polars.dsl.to_ast import to_parquet_filter +from cudf_polars.utils import dtypes if TYPE_CHECKING: - from collections.abc import Callable, MutableMapping + from collections.abc import Callable, Hashable, MutableMapping, Sequence from typing import Literal from cudf_polars.typing import Schema @@ -121,16 +124,27 @@ def broadcast(*columns: Column, target_length: int | None = None) -> list[Column ] -@dataclasses.dataclass -class IR: +class IR(Node["IR"]): """Abstract plan node, representing an unevaluated dataframe.""" + __slots__ = ("schema",) + # This annotation is needed because of https://github.com/python/mypy/issues/17981 + _non_child: ClassVar[tuple[str, ...]] = ("schema",) schema: Schema """Mapping from column names to their data types.""" - def __post_init__(self): - """Validate preconditions.""" - pass # noqa: PIE790 + def get_hashable(self) -> Hashable: + """ + Hashable representation of node, treating schema dictionary. + + Since the schema is a dictionary, even though it is morally + immutable, it is not hashable. We therefore convert it to + tuples for hashing purposes. + """ + # Schema is the first constructor argument + args = self._ctor_arguments(self.children)[1:] + schema_hash = tuple(self.schema.items()) + return (type(self), schema_hash, args) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """ @@ -159,24 +173,50 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: ) # pragma: no cover -@dataclasses.dataclass class PythonScan(IR): """Representation of input from a python function.""" + __slots__ = ("options", "predicate") + _non_child = ("schema", "options", "predicate") options: Any """Arbitrary options.""" predicate: expr.NamedExpr | None """Filter to apply to the constructed dataframe before returning it.""" - def __post_init__(self): - """Validate preconditions.""" + def __init__(self, schema: Schema, options: Any, predicate: expr.NamedExpr | None): + self.schema = schema + self.options = options + self.predicate = predicate + self.children = () raise NotImplementedError("PythonScan not implemented") -@dataclasses.dataclass class Scan(IR): """Input from files.""" + __slots__ = ( + "typ", + "reader_options", + "cloud_options", + "paths", + "with_columns", + "skip_rows", + "n_rows", + "row_index", + "predicate", + ) + _non_child = ( + "schema", + "typ", + "reader_options", + "cloud_options", + "paths", + "with_columns", + "skip_rows", + "n_rows", + "row_index", + "predicate", + ) typ: str """What type of file are we reading? Parquet, CSV, etc...""" reader_options: dict[str, Any] @@ -185,7 +225,7 @@ class Scan(IR): """Cloud-related authentication options, currently ignored.""" paths: list[str] """List of paths to read from.""" - with_columns: list[str] + with_columns: list[str] | None """Projected columns to return.""" skip_rows: int """Rows to skip at the start when reading.""" @@ -196,9 +236,30 @@ class Scan(IR): predicate: expr.NamedExpr | None """Mask to apply to the read dataframe.""" - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() + def __init__( + self, + schema: Schema, + typ: str, + reader_options: dict[str, Any], + cloud_options: dict[str, Any] | None, + paths: list[str], + with_columns: list[str] | None, + skip_rows: int, + n_rows: int, + row_index: tuple[str, int] | None, + predicate: expr.NamedExpr | None, + ): + self.schema = schema + self.typ = typ + self.reader_options = reader_options + self.cloud_options = cloud_options + self.paths = paths + self.with_columns = with_columns + self.skip_rows = skip_rows + self.n_rows = n_rows + self.row_index = row_index + self.predicate = predicate + self.children = () if self.typ not in ("csv", "parquet", "ndjson"): # pragma: no cover # This line is unhittable ATM since IPC/Anonymous scan raise # on the polars side @@ -258,6 +319,28 @@ def __post_init__(self) -> None: "Reading only parquet metadata to produce row index." ) + def get_hashable(self) -> Hashable: + """ + Hashable representation of the node. + + The options dictionaries are serialised for hashing purposes + as json strings. + """ + schema_hash = tuple(self.schema.items()) + return ( + type(self), + schema_hash, + self.typ, + json.dumps(self.reader_options), + json.dumps(self.cloud_options), + tuple(self.paths), + tuple(self.with_columns) if self.with_columns is not None else None, + self.skip_rows, + self.n_rows, + self.row_index, + self.predicate, + ) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" with_columns = self.with_columns @@ -337,9 +420,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: colnames[0], ) elif self.typ == "parquet": + filters = None + if self.predicate is not None and self.row_index is None: + # Can't apply filters during read if we have a row index. + filters = to_parquet_filter(self.predicate.value) tbl_w_meta = plc.io.parquet.read_parquet( plc.io.SourceInfo(self.paths), columns=with_columns, + filters=filters, nrows=n_rows, skip_rows=self.skip_rows, ) @@ -348,6 +436,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: # TODO: consider nested column names? tbl_w_meta.column_names(include_children=False), ) + if filters is not None: + # Mask must have been applied. + return df elif self.typ == "ndjson": json_schema: list[tuple[str, str, list]] = [ (name, typ, []) for name, typ in self.schema.items() @@ -401,7 +492,6 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.filter(mask) -@dataclasses.dataclass class Cache(IR): """ Return a cached plan node. @@ -409,20 +499,25 @@ class Cache(IR): Used for CSE at the plan level. """ + __slots__ = ("key",) + _non_child = ("schema", "key") key: int """The cache key.""" - value: IR - """The unevaluated node to cache.""" + + def __init__(self, schema: Schema, key: int, value: IR): + self.schema = schema + self.key = key + self.children = (value,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" try: return cache[self.key] except KeyError: - return cache.setdefault(self.key, self.value.evaluate(cache=cache)) + (value,) = self.children + return cache.setdefault(self.key, value.evaluate(cache=cache)) -@dataclasses.dataclass class DataFrameScan(IR): """ Input from an existing polars DataFrame. @@ -430,13 +525,38 @@ class DataFrameScan(IR): This typically arises from ``q.collect().lazy()`` """ + __slots__ = ("df", "projection", "predicate") + _non_child = ("schema", "df", "projection", "predicate") df: Any """Polars LazyFrame object.""" - projection: list[str] + projection: tuple[str, ...] | None """List of columns to project out.""" predicate: expr.NamedExpr | None """Mask to apply.""" + def __init__( + self, + schema: Schema, + df: Any, + projection: Sequence[str] | None, + predicate: expr.NamedExpr | None, + ): + self.schema = schema + self.df = df + self.projection = tuple(projection) if projection is not None else None + self.predicate = predicate + self.children = () + + def get_hashable(self) -> Hashable: + """ + Hashable representation of the node. + + The (heavy) dataframe object is hashed as its id, so this is + not stable across runs, or repeat instances of the same equal dataframes. + """ + schema_hash = tuple(self.schema.items()) + return (type(self), schema_hash, id(self.df), self.projection, self.predicate) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" pdf = pl.DataFrame._from_pydf(self.df) @@ -454,28 +574,39 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df -@dataclasses.dataclass class Select(IR): """Produce a new dataframe selecting given expressions from an input.""" - df: IR - """Input dataframe.""" - expr: list[expr.NamedExpr] + __slots__ = ("exprs", "should_broadcast") + _non_child = ("schema", "exprs", "should_broadcast") + exprs: tuple[expr.NamedExpr, ...] """List of expressions to evaluate to form the new dataframe.""" should_broadcast: bool """Should columns be broadcast?""" + def __init__( + self, + schema: Schema, + exprs: Sequence[expr.NamedExpr], + should_broadcast: bool, # noqa: FBT001 + df: IR, + ): + self.schema = schema + self.exprs = tuple(exprs) + self.should_broadcast = should_broadcast + self.children = (df,) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) # Handle any broadcasting - columns = [e.evaluate(df) for e in self.expr] + columns = [e.evaluate(df) for e in self.exprs] if self.should_broadcast: columns = broadcast(*columns) return DataFrame(columns) -@dataclasses.dataclass class Reduce(IR): """ Produce a new dataframe selecting given expressions from an input. @@ -483,36 +614,73 @@ class Reduce(IR): This is a special case of :class:`Select` where all outputs are a single row. """ - df: IR - """Input dataframe.""" - expr: list[expr.NamedExpr] + __slots__ = ("exprs",) + _non_child = ("schema", "exprs") + exprs: tuple[expr.NamedExpr, ...] """List of expressions to evaluate to form the new dataframe.""" + def __init__( + self, schema: Schema, exprs: Sequence[expr.NamedExpr], df: IR + ): # pragma: no cover; polars doesn't emit this node yet + self.schema = schema + self.exprs = tuple(exprs) + self.children = (df,) + def evaluate( self, *, cache: MutableMapping[int, DataFrame] ) -> DataFrame: # pragma: no cover; polars doesn't emit this node yet """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) - columns = broadcast(*(e.evaluate(df) for e in self.expr)) + (child,) = self.children + df = child.evaluate(cache=cache) + columns = broadcast(*(e.evaluate(df) for e in self.exprs)) assert all(column.obj.size() == 1 for column in columns) return DataFrame(columns) -@dataclasses.dataclass class GroupBy(IR): """Perform a groupby.""" - df: IR - """Input dataframe.""" - agg_requests: list[expr.NamedExpr] - """List of expressions to evaluate groupwise.""" - keys: list[expr.NamedExpr] - """List of expressions forming the keys.""" + __slots__ = ( + "agg_requests", + "keys", + "maintain_order", + "options", + "agg_infos", + ) + _non_child = ("schema", "keys", "agg_requests", "maintain_order", "options") + keys: tuple[expr.NamedExpr, ...] + """Grouping keys.""" + agg_requests: tuple[expr.NamedExpr, ...] + """Aggregation expressions.""" maintain_order: bool - """Should the order of the input dataframe be maintained?""" + """Preserve order in groupby.""" options: Any - """Options controlling style of groupby.""" - agg_infos: list[expr.AggInfo] = dataclasses.field(init=False) + """Arbitrary options.""" + + def __init__( + self, + schema: Schema, + keys: Sequence[expr.NamedExpr], + agg_requests: Sequence[expr.NamedExpr], + maintain_order: bool, # noqa: FBT001 + options: Any, + df: IR, + ): + self.schema = schema + self.keys = tuple(keys) + self.agg_requests = tuple(agg_requests) + self.maintain_order = maintain_order + self.options = options + self.children = (df,) + if self.options.rolling: + raise NotImplementedError( + "rolling window/groupby" + ) # pragma: no cover; rollingwindow constructor has already raised + if self.options.dynamic: + raise NotImplementedError("dynamic group by") + if any(GroupBy.check_agg(a.value) > 1 for a in self.agg_requests): + raise NotImplementedError("Nested aggregations in groupby") + self.agg_infos = [req.collect_agg(depth=0) for req in self.agg_requests] @staticmethod def check_agg(agg: expr.Expr) -> int: @@ -542,22 +710,10 @@ def check_agg(agg: expr.Expr) -> int: else: raise NotImplementedError(f"No handler for {agg=}") - def __post_init__(self) -> None: - """Check whether all the aggregations are implemented.""" - super().__post_init__() - if self.options.rolling: - raise NotImplementedError( - "rolling window/groupby" - ) # pragma: no cover; rollingwindow constructor has already raised - if any(GroupBy.check_agg(a.value) > 1 for a in self.agg_requests): - raise NotImplementedError("Nested aggregations in groupby") - self.agg_infos = [req.collect_agg(depth=0) for req in self.agg_requests] - if len(self.keys) == 0: - raise NotImplementedError("dynamic groupby") - def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) keys = broadcast( *(k.evaluate(df) for k in self.keys), target_length=df.num_rows ) @@ -646,23 +802,20 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(broadcasted).slice(self.options.slice) -@dataclasses.dataclass class Join(IR): """A join of two dataframes.""" - left: IR - """Left frame.""" - right: IR - """Right frame.""" - left_on: list[expr.NamedExpr] + __slots__ = ("left_on", "right_on", "options") + _non_child = ("schema", "left_on", "right_on", "options") + left_on: tuple[expr.NamedExpr, ...] """List of expressions used as keys in the left frame.""" - right_on: list[expr.NamedExpr] + right_on: tuple[expr.NamedExpr, ...] """List of expressions used as keys in the right frame.""" options: tuple[ - Literal["inner", "left", "right", "full", "leftsemi", "leftanti", "cross"], + Literal["inner", "left", "right", "full", "semi", "anti", "cross"], bool, tuple[int, int] | None, - str | None, + str, bool, ] """ @@ -674,9 +827,20 @@ class Join(IR): - coalesce: should key columns be coalesced (only makes sense for outer joins) """ - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() + def __init__( + self, + schema: Schema, + left_on: Sequence[expr.NamedExpr], + right_on: Sequence[expr.NamedExpr], + options: Any, + left: IR, + right: IR, + ): + self.schema = schema + self.left_on = tuple(left_on) + self.right_on = tuple(right_on) + self.options = options + self.children = (left, right) if any( isinstance(e.value, expr.Literal) for e in itertools.chain(self.left_on, self.right_on) @@ -686,7 +850,7 @@ def __post_init__(self) -> None: @staticmethod @cache def _joiners( - how: Literal["inner", "left", "right", "full", "leftsemi", "leftanti"], + how: Literal["inner", "left", "right", "full", "semi", "anti"], ) -> tuple[ Callable, plc.copying.OutOfBoundsPolicy, plc.copying.OutOfBoundsPolicy | None ]: @@ -708,13 +872,13 @@ def _joiners( plc.copying.OutOfBoundsPolicy.NULLIFY, plc.copying.OutOfBoundsPolicy.NULLIFY, ) - elif how == "leftsemi": + elif how == "semi": return ( plc.join.left_semi_join, plc.copying.OutOfBoundsPolicy.DONT_CHECK, None, ) - elif how == "leftanti": + elif how == "anti": return ( plc.join.left_anti_join, plc.copying.OutOfBoundsPolicy.DONT_CHECK, @@ -777,10 +941,8 @@ def _reorder_maps( def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - left = self.left.evaluate(cache=cache) - right = self.right.evaluate(cache=cache) + left, right = (c.evaluate(cache=cache) for c in self.children) how, join_nulls, zlice, suffix, coalesce = self.options - suffix = "_right" if suffix is None else suffix if how == "cross": # Separate implementation, since cross_join returns the # result, not the gather maps @@ -802,7 +964,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: columns[left.num_columns :], right.column_names, strict=True ) ] - return DataFrame([*left_cols, *right_cols]) + return DataFrame([*left_cols, *right_cols]).slice(zlice) # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184 left_on = DataFrame(broadcast(*(e.evaluate(left) for e in self.left_on))) right_on = DataFrame(broadcast(*(e.evaluate(right) for e in self.right_on))) @@ -866,20 +1028,30 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(zlice) -@dataclasses.dataclass class HStack(IR): """Add new columns to a dataframe.""" - df: IR - """Input dataframe.""" - columns: list[expr.NamedExpr] - """List of expressions to produce new columns.""" + __slots__ = ("columns", "should_broadcast") + _non_child = ("schema", "columns", "should_broadcast") should_broadcast: bool - """Should columns be broadcast?""" + """Should the resulting evaluated columns be broadcast to the same length.""" + + def __init__( + self, + schema: Schema, + columns: Sequence[expr.NamedExpr], + should_broadcast: bool, # noqa: FBT001 + df: IR, + ): + self.schema = schema + self.columns = tuple(columns) + self.should_broadcast = should_broadcast + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) columns = [c.evaluate(df) for c in self.columns] if self.should_broadcast: columns = broadcast(*columns, target_length=df.num_rows) @@ -895,20 +1067,36 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.with_columns(columns) -@dataclasses.dataclass class Distinct(IR): """Produce a new dataframe with distinct rows.""" - df: IR - """Input dataframe.""" + __slots__ = ("keep", "subset", "zlice", "stable") + _non_child = ("schema", "keep", "subset", "zlice", "stable") keep: plc.stream_compaction.DuplicateKeepOption - """Which rows to keep.""" - subset: set[str] | None - """Which columns to inspect when computing distinct rows.""" + """Which distinct value to keep.""" + subset: frozenset[str] | None + """Which columns should be used to define distinctness. If None, + then all columns are used.""" zlice: tuple[int, int] | None - """Optional slice to perform after compaction.""" + """Optional slice to apply to the result.""" stable: bool - """Should order be preserved?""" + """Should the result maintain ordering.""" + + def __init__( + self, + schema: Schema, + keep: plc.stream_compaction.DuplicateKeepOption, + subset: frozenset[str] | None, + zlice: tuple[int, int] | None, + stable: bool, # noqa: FBT001 + df: IR, + ): + self.schema = schema + self.keep = keep + self.subset = subset + self.zlice = zlice + self.stable = stable + self.children = (df,) _KEEP_MAP: ClassVar[dict[str, plc.stream_compaction.DuplicateKeepOption]] = { "first": plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST, @@ -917,18 +1105,10 @@ class Distinct(IR): "any": plc.stream_compaction.DuplicateKeepOption.KEEP_ANY, } - def __init__(self, schema: Schema, df: IR, options: Any) -> None: - self.schema = schema - self.df = df - (keep, subset, maintain_order, zlice) = options - self.keep = Distinct._KEEP_MAP[keep] - self.subset = set(subset) if subset is not None else None - self.stable = maintain_order - self.zlice = zlice - def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) if self.subset is None: indices = list(range(df.num_columns)) keys_sorted = all(c.is_sorted for c in df.column_map.values()) @@ -967,46 +1147,44 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(self.zlice) -@dataclasses.dataclass class Sort(IR): """Sort a dataframe.""" - df: IR - """Input.""" - by: list[expr.NamedExpr] - """List of expressions to produce sort keys.""" - do_sort: Callable[..., plc.Table] - """pylibcudf sorting function.""" + __slots__ = ("by", "order", "null_order", "stable", "zlice") + _non_child = ("schema", "by", "order", "null_order", "stable", "zlice") + by: tuple[expr.NamedExpr, ...] + """Sort keys.""" + order: tuple[plc.types.Order, ...] + """Sort order for each sort key.""" + null_order: tuple[plc.types.NullOrder, ...] + """Null sorting location for each sort key.""" + stable: bool + """Should the sort be stable?""" zlice: tuple[int, int] | None - """Optional slice to apply after sorting.""" - order: list[plc.types.Order] - """Order keys should be sorted in.""" - null_order: list[plc.types.NullOrder] - """Where nulls sort to.""" + """Optional slice to apply to the result.""" def __init__( self, schema: Schema, - df: IR, - by: list[expr.NamedExpr], - options: Any, + by: Sequence[expr.NamedExpr], + order: Sequence[plc.types.Order], + null_order: Sequence[plc.types.NullOrder], + stable: bool, # noqa: FBT001 zlice: tuple[int, int] | None, - ) -> None: + df: IR, + ): self.schema = schema - self.df = df - self.by = by + self.by = tuple(by) + self.order = tuple(order) + self.null_order = tuple(null_order) + self.stable = stable self.zlice = zlice - stable, nulls_last, descending = options - self.order, self.null_order = sorting.sort_order( - descending, nulls_last=nulls_last, num_keys=len(by) - ) - self.do_sort = ( - plc.sorting.stable_sort_by_key if stable else plc.sorting.sort_by_key - ) + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) sort_keys = broadcast( *(k.evaluate(df) for k in self.by), target_length=df.num_rows ) @@ -1016,11 +1194,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: for i, k in enumerate(sort_keys) if k.name in df.column_map and k.obj is df.column_map[k.name].obj } - table = self.do_sort( + do_sort = ( + plc.sorting.stable_sort_by_key if self.stable else plc.sorting.sort_by_key + ) + table = do_sort( df.table, plc.Table([k.obj for k in sort_keys]), - self.order, - self.null_order, + list(self.order), + list(self.null_order), ) columns: list[Column] = [] for name, c in zip(df.column_map, table.columns(), strict=True): @@ -1037,49 +1218,64 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns).slice(self.zlice) -@dataclasses.dataclass class Slice(IR): """Slice a dataframe.""" - df: IR - """Input.""" + __slots__ = ("offset", "length") + _non_child = ("schema", "offset", "length") offset: int """Start of the slice.""" length: int """Length of the slice.""" + def __init__(self, schema: Schema, offset: int, length: int, df: IR): + self.schema = schema + self.offset = offset + self.length = length + self.children = (df,) + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) return df.slice((self.offset, self.length)) -@dataclasses.dataclass class Filter(IR): """Filter a dataframe with a boolean mask.""" - df: IR - """Input.""" + __slots__ = ("mask",) + _non_child = ("schema", "mask") mask: expr.NamedExpr - """Expression evaluating to a mask.""" + """Expression to produce the filter mask.""" + + def __init__(self, schema: Schema, mask: expr.NamedExpr, df: IR): + self.schema = schema + self.mask = mask + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) (mask,) = broadcast(self.mask.evaluate(df), target_length=df.num_rows) return df.filter(mask) -@dataclasses.dataclass class Projection(IR): """Select a subset of columns from a dataframe.""" - df: IR - """Input.""" + __slots__ = () + _non_child = ("schema",) + + def __init__(self, schema: Schema, df: IR): + self.schema = schema + self.children = (df,) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - df = self.df.evaluate(cache=cache) + (child,) = self.children + df = child.evaluate(cache=cache) # This can reorder things. columns = broadcast( *(df.column_map[name] for name in self.schema), target_length=df.num_rows @@ -1087,16 +1283,15 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns) -@dataclasses.dataclass class MapFunction(IR): """Apply some function to a dataframe.""" - df: IR - """Input.""" + __slots__ = ("name", "options") + _non_child = ("schema", "name", "options") name: str - """Function name.""" + """Name of the function to apply""" options: Any - """Arbitrary options, interpreted per function.""" + """Arbitrary name-specific options""" _NAMES: ClassVar[frozenset[str]] = frozenset( [ @@ -1111,9 +1306,11 @@ class MapFunction(IR): ] ) - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() + def __init__(self, schema: Schema, name: str, options: Any, df: IR): + self.schema = schema + self.name = name + self.options = options + self.children = (df,) if self.name not in MapFunction._NAMES: raise NotImplementedError(f"Unhandled map function {self.name}") if self.name == "explode": @@ -1127,7 +1324,7 @@ def __post_init__(self) -> None: old, new, _ = self.options # TODO: perhaps polars should validate renaming in the IR? if len(new) != len(set(new)) or ( - set(new) & (set(self.df.schema.keys()) - set(old)) + set(new) & (set(df.schema.keys()) - set(old)) ): raise NotImplementedError("Duplicate new names in rename.") elif self.name == "unpivot": @@ -1136,31 +1333,31 @@ def __post_init__(self) -> None: variable_name = "variable" if variable_name is None else variable_name if len(pivotees) == 0: index = frozenset(indices) - pivotees = [name for name in self.df.schema if name not in index] + pivotees = [name for name in df.schema if name not in index] if not all( - dtypes.can_cast(self.df.schema[p], self.schema[value_name]) - for p in pivotees + dtypes.can_cast(df.schema[p], self.schema[value_name]) for p in pivotees ): raise NotImplementedError( "Unpivot cannot cast all input columns to " f"{self.schema[value_name].id()}" ) - self.options = (indices, pivotees, variable_name, value_name) + self.options = (tuple(indices), tuple(pivotees), variable_name, value_name) def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" + (child,) = self.children if self.name == "rechunk": # No-op in our data model # Don't think this appears in a plan tree from python - return self.df.evaluate(cache=cache) # pragma: no cover + return child.evaluate(cache=cache) # pragma: no cover elif self.name == "rename": - df = self.df.evaluate(cache=cache) + df = child.evaluate(cache=cache) # final tag is "swapping" which is useful for the # optimiser (it blocks some pushdown operations) old, new, _ = self.options return df.rename_columns(dict(zip(old, new, strict=True))) elif self.name == "explode": - df = self.df.evaluate(cache=cache) + df = child.evaluate(cache=cache) ((to_explode,),) = self.options index = df.column_names.index(to_explode) subset = df.column_names_set - {to_explode} @@ -1170,7 +1367,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: elif self.name == "unpivot": indices, pivotees, variable_name, value_name = self.options npiv = len(pivotees) - df = self.df.evaluate(cache=cache) + df = child.evaluate(cache=cache) index_columns = [ Column(col, name=name) for col, name in zip( @@ -1209,37 +1406,40 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: raise AssertionError("Should never be reached") # pragma: no cover -@dataclasses.dataclass class Union(IR): """Concatenate dataframes vertically.""" - dfs: list[IR] - """List of inputs.""" + __slots__ = ("zlice",) + _non_child = ("schema", "zlice") zlice: tuple[int, int] | None - """Optional slice to apply after concatenation.""" + """Optional slice to apply to the result.""" - def __post_init__(self) -> None: - """Validate preconditions.""" - super().__post_init__() - schema = self.dfs[0].schema - if not all(s.schema == schema for s in self.dfs[1:]): + def __init__(self, schema: Schema, zlice: tuple[int, int] | None, *children: IR): + self.schema = schema + self.zlice = zlice + self.children = children + schema = self.children[0].schema + if not all(s.schema == schema for s in self.children[1:]): raise NotImplementedError("Schema mismatch") def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" # TODO: only evaluate what we need if we have a slice - dfs = [df.evaluate(cache=cache) for df in self.dfs] + dfs = [df.evaluate(cache=cache) for df in self.children] return DataFrame.from_table( plc.concatenate.concatenate([df.table for df in dfs]), dfs[0].column_names ).slice(self.zlice) -@dataclasses.dataclass class HConcat(IR): """Concatenate dataframes horizontally.""" - dfs: list[IR] - """List of inputs.""" + __slots__ = () + _non_child = ("schema",) + + def __init__(self, schema: Schema, *children: IR): + self.schema = schema + self.children = children @staticmethod def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table: @@ -1271,7 +1471,7 @@ def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table: def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" - dfs = [df.evaluate(cache=cache) for df in self.dfs] + dfs = [df.evaluate(cache=cache) for df in self.children] max_rows = max(df.num_rows for df in dfs) # Horizontal concatenation extends shorter tables with nulls dfs = [ diff --git a/python/cudf_polars/cudf_polars/dsl/nodebase.py b/python/cudf_polars/cudf_polars/dsl/nodebase.py new file mode 100644 index 00000000000..228d300f467 --- /dev/null +++ b/python/cudf_polars/cudf_polars/dsl/nodebase.py @@ -0,0 +1,152 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +"""Base class for IR nodes, and utilities.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar + +if TYPE_CHECKING: + from collections.abc import Hashable, Sequence + + from typing_extensions import Self + + +__all__: list[str] = ["Node"] + +T = TypeVar("T", bound="Node[Any]") + + +class Node(Generic[T]): + """ + An abstract node type. + + Nodes are immutable! + + This contains a (potentially empty) tuple of child nodes, + along with non-child data. For uniform reconstruction and + implementation of hashing and equality schemes, child classes need + to provide a certain amount of metadata when they are defined. + Specifically, the ``_non_child`` attribute must list, in-order, + the names of the slots that are passed to the constructor. The + constructor must take arguments in the order ``(*_non_child, + *children).`` + """ + + __slots__ = ("_hash_value", "_repr_value", "children") + _hash_value: int + _repr_value: str + children: tuple[T, ...] + _non_child: ClassVar[tuple[str, ...]] = () + + def _ctor_arguments(self, children: Sequence[T]) -> Sequence[Any | T]: + return (*(getattr(self, attr) for attr in self._non_child), *children) + + def reconstruct( + self, children: Sequence[T] + ) -> Self: # pragma: no cover; not yet used + """ + Rebuild this node with new children. + + Parameters + ---------- + children + New children + + Returns + ------- + New node with new children. Non-child data is shared with the input. + """ + return type(self)(*self._ctor_arguments(children)) + + def get_hashable(self) -> Hashable: + """ + Return a hashable object for the node. + + Returns + ------- + Hashable object. + + Notes + ----- + This method is used by the :meth:`__hash__` implementation + (which does caching). If your node type needs special-case + handling for some of its attributes, override this method, not + :meth:`__hash__`. + """ + return (type(self), self._ctor_arguments(self.children)) + + def __hash__(self) -> int: + """ + Hash of an expression with caching. + + See Also + -------- + get_hashable + """ + try: + return self._hash_value + except AttributeError: + self._hash_value = hash(self.get_hashable()) + return self._hash_value + + def is_equal(self, other: Self) -> bool: + """ + Equality of two nodes of equal type. + + Override this in subclasses, rather than :meth:`__eq__`. + + Parameter + --------- + other + object of same type to compare to. + + Notes + ----- + Since nodes are immutable, this does common subexpression + elimination when two nodes are determined to be equal. + + :meth:`__eq__` handles the case where the objects being + compared are not of the same type, so in this method, we only + need to implement equality of equal types. + + Returns + ------- + True if the two nodes are equal, false otherwise. + """ + if self is other: + return True + result = self._ctor_arguments(self.children) == other._ctor_arguments( + other.children + ) + # Eager CSE for nodes that match. + if result: + self.children = other.children + return result + + def __eq__(self, other: Any) -> bool: + """ + Equality of expressions. + + See Also + -------- + is_equal + """ + if type(self) is not type(other) or hash(self) != hash(other): + return False + else: + return self.is_equal(other) + + def __ne__(self, other: Any) -> bool: + """Inequality of expressions.""" + return not self.__eq__(other) + + def __repr__(self) -> str: + """String representation of an expression with caching.""" + try: + return self._repr_value + except AttributeError: + args = ", ".join(f"{arg!r}" for arg in self._ctor_arguments(self.children)) + self._repr_value = f"{type(self).__name__}({args})" + return self._repr_value diff --git a/python/cudf_polars/cudf_polars/dsl/to_ast.py b/python/cudf_polars/cudf_polars/dsl/to_ast.py new file mode 100644 index 00000000000..9a0838631cc --- /dev/null +++ b/python/cudf_polars/cudf_polars/dsl/to_ast.py @@ -0,0 +1,265 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +"""Conversion of expression nodes to libcudf AST nodes.""" + +from __future__ import annotations + +from functools import partial, reduce, singledispatch +from typing import TYPE_CHECKING, TypeAlias + +from polars.polars import _expr_nodes as pl_expr + +import pylibcudf as plc +from pylibcudf import expressions as plc_expr + +from cudf_polars.dsl import expr +from cudf_polars.dsl.traversal import CachingVisitor +from cudf_polars.typing import GenericTransformer + +if TYPE_CHECKING: + from collections.abc import Mapping + +# Can't merge these op-mapping dictionaries because scoped enum values +# are exposed by cython with equality/hash based one their underlying +# representation type. So in a dict they are just treated as integers. +BINOP_TO_ASTOP = { + plc.binaryop.BinaryOperator.EQUAL: plc_expr.ASTOperator.EQUAL, + plc.binaryop.BinaryOperator.NULL_EQUALS: plc_expr.ASTOperator.NULL_EQUAL, + plc.binaryop.BinaryOperator.NOT_EQUAL: plc_expr.ASTOperator.NOT_EQUAL, + plc.binaryop.BinaryOperator.LESS: plc_expr.ASTOperator.LESS, + plc.binaryop.BinaryOperator.LESS_EQUAL: plc_expr.ASTOperator.LESS_EQUAL, + plc.binaryop.BinaryOperator.GREATER: plc_expr.ASTOperator.GREATER, + plc.binaryop.BinaryOperator.GREATER_EQUAL: plc_expr.ASTOperator.GREATER_EQUAL, + plc.binaryop.BinaryOperator.ADD: plc_expr.ASTOperator.ADD, + plc.binaryop.BinaryOperator.SUB: plc_expr.ASTOperator.SUB, + plc.binaryop.BinaryOperator.MUL: plc_expr.ASTOperator.MUL, + plc.binaryop.BinaryOperator.DIV: plc_expr.ASTOperator.DIV, + plc.binaryop.BinaryOperator.TRUE_DIV: plc_expr.ASTOperator.TRUE_DIV, + plc.binaryop.BinaryOperator.FLOOR_DIV: plc_expr.ASTOperator.FLOOR_DIV, + plc.binaryop.BinaryOperator.PYMOD: plc_expr.ASTOperator.PYMOD, + plc.binaryop.BinaryOperator.BITWISE_AND: plc_expr.ASTOperator.BITWISE_AND, + plc.binaryop.BinaryOperator.BITWISE_OR: plc_expr.ASTOperator.BITWISE_OR, + plc.binaryop.BinaryOperator.BITWISE_XOR: plc_expr.ASTOperator.BITWISE_XOR, + plc.binaryop.BinaryOperator.LOGICAL_AND: plc_expr.ASTOperator.LOGICAL_AND, + plc.binaryop.BinaryOperator.LOGICAL_OR: plc_expr.ASTOperator.LOGICAL_OR, + plc.binaryop.BinaryOperator.NULL_LOGICAL_AND: plc_expr.ASTOperator.NULL_LOGICAL_AND, + plc.binaryop.BinaryOperator.NULL_LOGICAL_OR: plc_expr.ASTOperator.NULL_LOGICAL_OR, +} + +UOP_TO_ASTOP = { + plc.unary.UnaryOperator.SIN: plc_expr.ASTOperator.SIN, + plc.unary.UnaryOperator.COS: plc_expr.ASTOperator.COS, + plc.unary.UnaryOperator.TAN: plc_expr.ASTOperator.TAN, + plc.unary.UnaryOperator.ARCSIN: plc_expr.ASTOperator.ARCSIN, + plc.unary.UnaryOperator.ARCCOS: plc_expr.ASTOperator.ARCCOS, + plc.unary.UnaryOperator.ARCTAN: plc_expr.ASTOperator.ARCTAN, + plc.unary.UnaryOperator.SINH: plc_expr.ASTOperator.SINH, + plc.unary.UnaryOperator.COSH: plc_expr.ASTOperator.COSH, + plc.unary.UnaryOperator.TANH: plc_expr.ASTOperator.TANH, + plc.unary.UnaryOperator.ARCSINH: plc_expr.ASTOperator.ARCSINH, + plc.unary.UnaryOperator.ARCCOSH: plc_expr.ASTOperator.ARCCOSH, + plc.unary.UnaryOperator.ARCTANH: plc_expr.ASTOperator.ARCTANH, + plc.unary.UnaryOperator.EXP: plc_expr.ASTOperator.EXP, + plc.unary.UnaryOperator.LOG: plc_expr.ASTOperator.LOG, + plc.unary.UnaryOperator.SQRT: plc_expr.ASTOperator.SQRT, + plc.unary.UnaryOperator.CBRT: plc_expr.ASTOperator.CBRT, + plc.unary.UnaryOperator.CEIL: plc_expr.ASTOperator.CEIL, + plc.unary.UnaryOperator.FLOOR: plc_expr.ASTOperator.FLOOR, + plc.unary.UnaryOperator.ABS: plc_expr.ASTOperator.ABS, + plc.unary.UnaryOperator.RINT: plc_expr.ASTOperator.RINT, + plc.unary.UnaryOperator.BIT_INVERT: plc_expr.ASTOperator.BIT_INVERT, + plc.unary.UnaryOperator.NOT: plc_expr.ASTOperator.NOT, +} + +SUPPORTED_STATISTICS_BINOPS = { + plc.binaryop.BinaryOperator.EQUAL, + plc.binaryop.BinaryOperator.NOT_EQUAL, + plc.binaryop.BinaryOperator.LESS, + plc.binaryop.BinaryOperator.LESS_EQUAL, + plc.binaryop.BinaryOperator.GREATER, + plc.binaryop.BinaryOperator.GREATER_EQUAL, +} + +REVERSED_COMPARISON = { + plc.binaryop.BinaryOperator.EQUAL: plc.binaryop.BinaryOperator.EQUAL, + plc.binaryop.BinaryOperator.NOT_EQUAL: plc.binaryop.BinaryOperator.NOT_EQUAL, + plc.binaryop.BinaryOperator.LESS: plc.binaryop.BinaryOperator.GREATER, + plc.binaryop.BinaryOperator.LESS_EQUAL: plc.binaryop.BinaryOperator.GREATER_EQUAL, + plc.binaryop.BinaryOperator.GREATER: plc.binaryop.BinaryOperator.LESS, + plc.binaryop.BinaryOperator.GREATER_EQUAL: plc.binaryop.BinaryOperator.LESS_EQUAL, +} + + +Transformer: TypeAlias = GenericTransformer[expr.Expr, plc_expr.Expression] + + +@singledispatch +def _to_ast(node: expr.Expr, self: Transformer) -> plc_expr.Expression: + """ + Translate an expression to a pylibcudf Expression. + + Parameters + ---------- + node + Expression to translate. + self + Recursive transformer. The state dictionary should contain a + `for_parquet` key indicating if this transformation should + provide an expression suitable for use in parquet filters. + + If `for_parquet` is `False`, the dictionary should contain a + `name_to_index` mapping that maps column names to their + integer index in the table that will be used for evaluation of + the expression. + + Returns + ------- + pylibcudf Expression. + + Raises + ------ + NotImplementedError or KeyError if the expression cannot be translated. + """ + raise NotImplementedError(f"Unhandled expression type {type(node)}") + + +@_to_ast.register +def _(node: expr.Col, self: Transformer) -> plc_expr.Expression: + if self.state["for_parquet"]: + return plc_expr.ColumnNameReference(node.name) + return plc_expr.ColumnReference(self.state["name_to_index"][node.name]) + + +@_to_ast.register +def _(node: expr.Literal, self: Transformer) -> plc_expr.Expression: + return plc_expr.Literal(plc.interop.from_arrow(node.value)) + + +@_to_ast.register +def _(node: expr.BinOp, self: Transformer) -> plc_expr.Expression: + if node.op == plc.binaryop.BinaryOperator.NULL_NOT_EQUALS: + return plc_expr.Operation( + plc_expr.ASTOperator.NOT, + self( + # Reconstruct and apply, rather than directly + # constructing the right expression so we get the + # handling of parquet special cases for free. + expr.BinOp( + node.dtype, plc.binaryop.BinaryOperator.NULL_EQUALS, *node.children + ) + ), + ) + if self.state["for_parquet"]: + op1_col, op2_col = (isinstance(op, expr.Col) for op in node.children) + if op1_col ^ op2_col: + op = node.op + if op not in SUPPORTED_STATISTICS_BINOPS: + raise NotImplementedError( + f"Parquet filter binop with column doesn't support {node.op!r}" + ) + op1, op2 = node.children + if op2_col: + (op1, op2) = (op2, op1) + op = REVERSED_COMPARISON[op] + if not isinstance(op2, expr.Literal): + raise NotImplementedError( + "Parquet filter binops must have form 'col binop literal'" + ) + return plc_expr.Operation(BINOP_TO_ASTOP[op], self(op1), self(op2)) + elif op1_col and op2_col: + raise NotImplementedError( + "Parquet filter binops must have one column reference not two" + ) + return plc_expr.Operation(BINOP_TO_ASTOP[node.op], *map(self, node.children)) + + +@_to_ast.register +def _(node: expr.BooleanFunction, self: Transformer) -> plc_expr.Expression: + if node.name == pl_expr.BooleanFunction.IsIn: + needles, haystack = node.children + if isinstance(haystack, expr.LiteralColumn) and len(haystack.value) < 16: + # 16 is an arbitrary limit + needle_ref = self(needles) + values = [ + plc_expr.Literal(plc.interop.from_arrow(v)) for v in haystack.value + ] + return reduce( + partial(plc_expr.Operation, plc_expr.ASTOperator.LOGICAL_OR), + ( + plc_expr.Operation(plc_expr.ASTOperator.EQUAL, needle_ref, value) + for value in values + ), + ) + if self.state["for_parquet"] and isinstance(node.children[0], expr.Col): + raise NotImplementedError( + f"Parquet filters don't support {node.name} on columns" + ) + if node.name == pl_expr.BooleanFunction.IsNull: + return plc_expr.Operation(plc_expr.ASTOperator.IS_NULL, self(node.children[0])) + elif node.name == pl_expr.BooleanFunction.IsNotNull: + return plc_expr.Operation( + plc_expr.ASTOperator.NOT, + plc_expr.Operation(plc_expr.ASTOperator.IS_NULL, self(node.children[0])), + ) + elif node.name == pl_expr.BooleanFunction.Not: + return plc_expr.Operation(plc_expr.ASTOperator.NOT, self(node.children[0])) + raise NotImplementedError(f"AST conversion does not support {node.name}") + + +@_to_ast.register +def _(node: expr.UnaryFunction, self: Transformer) -> plc_expr.Expression: + if isinstance(node.children[0], expr.Col) and self.state["for_parquet"]: + raise NotImplementedError( + "Parquet filters don't support {node.name} on columns" + ) + return plc_expr.Operation( + UOP_TO_ASTOP[node._OP_MAPPING[node.name]], self(node.children[0]) + ) + + +def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None: + """ + Convert an expression to libcudf AST nodes suitable for parquet filtering. + + Parameters + ---------- + node + Expression to convert. + + Returns + ------- + pylibcudf Expression if conversion is possible, otherwise None. + """ + mapper = CachingVisitor(_to_ast, state={"for_parquet": True}) + try: + return mapper(node) + except (KeyError, NotImplementedError): + return None + + +def to_ast( + node: expr.Expr, *, name_to_index: Mapping[str, int] +) -> plc_expr.Expression | None: + """ + Convert an expression to libcudf AST nodes suitable for compute_column. + + Parameters + ---------- + node + Expression to convert. + name_to_index + Mapping from column names to their index in the table that + will be used for expression evaluation. + + Returns + ------- + pylibcudf Expressoin if conversion is possible, otherwise None. + """ + mapper = CachingVisitor( + _to_ast, state={"for_parquet": False, "name_to_index": name_to_index} + ) + try: + return mapper(node) + except (KeyError, NotImplementedError): + return None diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index a0291037f01..5181214819e 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -5,22 +5,28 @@ from __future__ import annotations +import functools import json from contextlib import AbstractContextManager, nullcontext from functools import singledispatch -from typing import Any +from typing import TYPE_CHECKING, Any import pyarrow as pa -import pylibcudf as plc from typing_extensions import assert_never import polars as pl import polars.polars as plrs from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir +import pylibcudf as plc + from cudf_polars.dsl import expr, ir +from cudf_polars.dsl.traversal import make_recursive, reuse_if_unchanged from cudf_polars.typing import NodeTraverser -from cudf_polars.utils import dtypes +from cudf_polars.utils import dtypes, sorting + +if TYPE_CHECKING: + from cudf_polars.typing import ExprTransformer __all__ = ["translate_ir", "translate_named_expr"] @@ -148,7 +154,7 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) exprs = [translate_named_expr(visitor, n=e) for e in node.expr] - return ir.Select(schema, inp, exprs, node.should_broadcast) + return ir.Select(schema, exprs, node.should_broadcast, inp) @_translate_ir.register @@ -161,11 +167,11 @@ def _( keys = [translate_named_expr(visitor, n=e) for e in node.keys] return ir.GroupBy( schema, - inp, - aggs, keys, + aggs, node.maintain_order, node.options, + inp, ) @@ -182,7 +188,71 @@ def _( with set_node(visitor, node.input_right): inp_right = translate_ir(visitor, n=None) right_on = [translate_named_expr(visitor, n=e) for e in node.right_on] - return ir.Join(schema, inp_left, inp_right, left_on, right_on, node.options) + if (how := node.options[0]) in { + "inner", + "left", + "right", + "full", + "cross", + "semi", + "anti", + }: + return ir.Join(schema, left_on, right_on, node.options, inp_left, inp_right) + else: + how, op1, op2 = how + if how != "ie_join": + raise NotImplementedError( + f"Unsupported join type {how}" + ) # pragma: no cover; asof joins not yet exposed + # No exposure of mixed/conditional joins in pylibcudf yet, so in + # the first instance, implement by doing a cross join followed by + # a filter. + _, join_nulls, zlice, suffix, coalesce = node.options + cross = ir.Join( + schema, + [], + [], + ("cross", join_nulls, None, suffix, coalesce), + inp_left, + inp_right, + ) + dtype = plc.DataType(plc.TypeId.BOOL8) + if op2 is None: + ops = [op1] + else: + ops = [op1, op2] + suffix = cross.options[3] + + # Column references in the right table refer to the post-join + # names, so with suffixes. + def _rename(e: expr.Expr, rec: ExprTransformer) -> expr.Expr: + if isinstance(e, expr.Col) and e.name in inp_left.schema: + return type(e)(e.dtype, f"{e.name}{suffix}") + return reuse_if_unchanged(e, rec) + + mapper = make_recursive(_rename) + right_on = [ + expr.NamedExpr( + f"{old.name}{suffix}" if old.name in inp_left.schema else old.name, new + ) + for new, old in zip( + (mapper(e.value) for e in right_on), right_on, strict=True + ) + ] + mask = functools.reduce( + functools.partial( + expr.BinOp, dtype, plc.binaryop.BinaryOperator.LOGICAL_AND + ), + ( + expr.BinOp(dtype, expr.BinOp._MAPPING[op], left.value, right.value) + for op, left, right in zip(ops, left_on, right_on, strict=True) + ), + ) + filtered = ir.Filter(schema, expr.NamedExpr("mask", mask), cross) + if zlice is not None: + offset, length = zlice + return ir.Slice(schema, offset, length, filtered) + return filtered @_translate_ir.register @@ -192,7 +262,7 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) exprs = [translate_named_expr(visitor, n=e) for e in node.exprs] - return ir.HStack(schema, inp, exprs, node.should_broadcast) + return ir.HStack(schema, exprs, node.should_broadcast, inp) @_translate_ir.register @@ -202,17 +272,23 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) exprs = [translate_named_expr(visitor, n=e) for e in node.expr] - return ir.Reduce(schema, inp, exprs) + return ir.Reduce(schema, exprs, inp) @_translate_ir.register def _( node: pl_ir.Distinct, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: + (keep, subset, maintain_order, zlice) = node.options + keep = ir.Distinct._KEEP_MAP[keep] + subset = frozenset(subset) if subset is not None else None return ir.Distinct( schema, + keep, + subset, + zlice, + maintain_order, translate_ir(visitor, n=node.input), - node.options, ) @@ -223,14 +299,18 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) by = [translate_named_expr(visitor, n=e) for e in node.by_column] - return ir.Sort(schema, inp, by, node.sort_options, node.slice) + stable, nulls_last, descending = node.sort_options + order, null_order = sorting.sort_order( + descending, nulls_last=nulls_last, num_keys=len(by) + ) + return ir.Sort(schema, by, order, null_order, stable, node.slice, inp) @_translate_ir.register def _( node: pl_ir.Slice, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: - return ir.Slice(schema, translate_ir(visitor, n=node.input), node.offset, node.len) + return ir.Slice(schema, node.offset, node.len, translate_ir(visitor, n=node.input)) @_translate_ir.register @@ -240,7 +320,7 @@ def _( with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) mask = translate_named_expr(visitor, n=node.predicate) - return ir.Filter(schema, inp, mask) + return ir.Filter(schema, mask, inp) @_translate_ir.register @@ -259,10 +339,10 @@ def _( name, *options = node.function return ir.MapFunction( schema, - # TODO: merge_sorted breaks this pattern - translate_ir(visitor, n=node.input), name, options, + # TODO: merge_sorted breaks this pattern + translate_ir(visitor, n=node.input), ) @@ -271,7 +351,7 @@ def _( node: pl_ir.Union, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: return ir.Union( - schema, [translate_ir(visitor, n=n) for n in node.inputs], node.options + schema, node.options, *(translate_ir(visitor, n=n) for n in node.inputs) ) @@ -279,7 +359,7 @@ def _( def _( node: pl_ir.HConcat, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: - return ir.HConcat(schema, [translate_ir(visitor, n=n) for n in node.inputs]) + return ir.HConcat(schema, *(translate_ir(visitor, n=n) for n in node.inputs)) def translate_ir(visitor: NodeTraverser, *, n: int | None = None) -> ir.IR: @@ -309,8 +389,7 @@ def translate_ir(visitor: NodeTraverser, *, n: int | None = None) -> ir.IR: # IR is versioned with major.minor, minor is bumped for backwards # compatible changes (e.g. adding new nodes), major is bumped for # incompatible changes (e.g. renaming nodes). - # Polars 1.7 changes definition of the CSV reader options schema name. - if (version := visitor.version()) >= (3, 0): + if (version := visitor.version()) >= (4, 0): raise NotImplementedError( f"No support for polars IR {version=}" ) # pragma: no cover; no such version for now. diff --git a/python/cudf_polars/cudf_polars/dsl/traversal.py b/python/cudf_polars/cudf_polars/dsl/traversal.py new file mode 100644 index 00000000000..be8338cb9a9 --- /dev/null +++ b/python/cudf_polars/cudf_polars/dsl/traversal.py @@ -0,0 +1,175 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +"""Traversal and visitor utilities for nodes.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Generic + +from cudf_polars.typing import U_contra, V_co + +if TYPE_CHECKING: + from collections.abc import Callable, Generator, Mapping, MutableMapping + + from cudf_polars.typing import GenericTransformer, NodeT + + +__all__: list[str] = [ + "traversal", + "reuse_if_unchanged", + "make_recursive", + "CachingVisitor", +] + + +def traversal(node: NodeT) -> Generator[NodeT, None, None]: + """ + Pre-order traversal of nodes in an expression. + + Parameters + ---------- + node + Root of expression to traverse. + + Yields + ------ + Unique nodes in the expression, parent before child, children + in-order from left to right. + """ + seen = {node} + lifo = [node] + + while lifo: + node = lifo.pop() + yield node + for child in reversed(node.children): + if child not in seen: + seen.add(child) + lifo.append(child) + + +def reuse_if_unchanged(node: NodeT, fn: GenericTransformer[NodeT, NodeT]) -> NodeT: + """ + Recipe for transforming nodes that returns the old object if unchanged. + + Parameters + ---------- + node + Node to recurse on + fn + Function to transform children + + Notes + ----- + This can be used as a generic "base case" handler when + writing transforms that take nodes and produce new nodes. + + Returns + ------- + Existing node `e` if transformed children are unchanged, otherwise + reconstructed node with new children. + """ + new_children = [fn(c) for c in node.children] + if all(new == old for new, old in zip(new_children, node.children, strict=True)): + return node + return node.reconstruct(new_children) + + +def make_recursive( + fn: Callable[[U_contra, GenericTransformer[U_contra, V_co]], V_co], + *, + state: Mapping[str, Any] | None = None, +) -> GenericTransformer[U_contra, V_co]: + """ + No-op wrapper for recursive visitors. + + Facilitates using visitors that don't need caching but are written + in the same style. + + Parameters + ---------- + fn + Function to transform inputs to outputs. Should take as its + second argument a callable from input to output. + state + Arbitrary *immutable* state that should be accessible to the + visitor through the `state` property. + + Notes + ----- + All transformation functions *must* be free of side-effects. + + Usually, prefer a :class:`CachingVisitor`, but if we know that we + don't need caching in a transformation and then this no-op + approach is slightly cheaper. + + Returns + ------- + Recursive function without caching. + + See Also + -------- + CachingVisitor + """ + + def rec(node: U_contra) -> V_co: + return fn(node, rec) # type: ignore[arg-type] + + rec.state = state if state is not None else {} # type: ignore[attr-defined] + return rec # type: ignore[return-value] + + +class CachingVisitor(Generic[U_contra, V_co]): + """ + Caching wrapper for recursive visitors. + + Facilitates writing visitors where already computed results should + be cached and reused. The cache is managed automatically, and is + tied to the lifetime of the wrapper. + + Parameters + ---------- + fn + Function to transform inputs to outputs. Should take as its + second argument the recursive cache manager. + state + Arbitrary *immutable* state that should be accessible to the + visitor through the `state` property. + + Notes + ----- + All transformation functions *must* be free of side-effects. + + Returns + ------- + Recursive function with caching. + """ + + def __init__( + self, + fn: Callable[[U_contra, GenericTransformer[U_contra, V_co]], V_co], + *, + state: Mapping[str, Any] | None = None, + ) -> None: + self.fn = fn + self.cache: MutableMapping[U_contra, V_co] = {} + self.state = state if state is not None else {} + + def __call__(self, value: U_contra) -> V_co: + """ + Apply the function to a value. + + Parameters + ---------- + value + The value to transform. + + Returns + ------- + A transformed value. + """ + try: + return self.cache[value] + except KeyError: + return self.cache.setdefault(value, self.fn(value, self)) diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index 7b6f3848fc4..7b45c1eaa06 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -151,7 +151,7 @@ def assert_collect_raises( collect_kwargs: dict[OptimizationArgs, bool] | None = None, polars_collect_kwargs: dict[OptimizationArgs, bool] | None = None, cudf_collect_kwargs: dict[OptimizationArgs, bool] | None = None, -): +) -> None: """ Assert that collecting the result of a query raises the expected exceptions. diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index 05b76d76808..e01ccd05527 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -16,7 +16,7 @@ from collections.abc import Mapping -def pytest_addoption(parser: pytest.Parser): +def pytest_addoption(parser: pytest.Parser) -> None: """Add plugin-specific options.""" group = parser.getgroup( "cudf-polars", "Plugin to set GPU as default engine for polars tests" @@ -28,7 +28,7 @@ def pytest_addoption(parser: pytest.Parser): ) -def pytest_configure(config: pytest.Config): +def pytest_configure(config: pytest.Config) -> None: """Enable use of this module as a pytest plugin to enable GPU collection.""" no_fallback = config.getoption("--cudf-polars-no-fallback") collect = polars.LazyFrame.collect @@ -53,12 +53,34 @@ def pytest_configure(config: pytest.Config): "tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match", "tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match", "tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-columns]": "Correctly raises but different error", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-row_groups]": "Correctly raises but different error", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-prefiltered]": "Correctly raises but different error", + "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-none]": "Correctly raises but different error", "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394", + "tests/unit/io/test_lazy_parquet.py::test_scan_parquet_ignores_dtype_mismatch_for_non_projected_columns_19249[False-False]": "Needs some variant of cudf#16394", + "tests/unit/io/test_lazy_parquet.py::test_scan_parquet_ignores_dtype_mismatch_for_non_projected_columns_19249[True-False]": "Needs some variant of cudf#16394", "tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?", "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394", "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception", "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception", "tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-columns]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-columns]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-none]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-prefiltered]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-row_groups]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-columns]": "Mismatching column read cudf#16394", + "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-columns]": "Mismatching column read cudf#16394", "tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match", "tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match", "tests/unit/io/test_scan.py::test_scan_with_filter[single-csv-async]": "Debug output on stderr doesn't match", @@ -107,6 +129,14 @@ def pytest_configure(config: pytest.Config): "tests/unit/operations/aggregation/test_aggregations.py::test_sum_empty_and_null_set": "libcudf sums column of all nulls to null, not zero", "tests/unit/operations/aggregation/test_aggregations.py::test_binary_op_agg_context_no_simplify_expr_12423": "groupby-agg of just literals should not produce collect_list", "tests/unit/operations/aggregation/test_aggregations.py::test_nan_inf_aggregation": "treatment of nans and nulls together is different in libcudf and polars in groupby-agg context", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func0-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func1-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func2-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func0-func3-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func0-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype", @@ -124,13 +154,6 @@ def pytest_configure(config: pytest.Config): "tests/unit/operations/test_group_by.py::test_group_by_binary_agg_with_literal": "Incorrect broadcasting of literals in groupby-agg", "tests/unit/operations/test_group_by.py::test_aggregated_scalar_elementwise_15602": "Unsupported boolean function/dtype combination in groupby-agg", "tests/unit/operations/test_group_by.py::test_schemas[data1-expr1-expected_select1-expected_gb1]": "Mismatching dtypes, needs cudf#15852", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_by_monday_and_offset_5444": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_label[left-expected0]": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_label[right-expected1]": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_label[datapoint-expected2]": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_rolling_dynamic_sortedness_check": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_validation": "IR needs to expose groupby-dynamic information", - "tests/unit/operations/test_group_by_dynamic.py::test_group_by_dynamic_15225": "IR needs to expose groupby-dynamic information", "tests/unit/operations/test_join.py::test_cross_join_slice_pushdown": "Need to implement slice pushdown for cross joins", "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU", "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU", @@ -140,6 +163,7 @@ def pytest_configure(config: pytest.Config): "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics": "Debug output on stderr doesn't match", "tests/unit/test_cse.py::test_cse_predicate_self_join": "Debug output on stderr doesn't match", "tests/unit/test_empty.py::test_empty_9137": "Mismatching dtypes, needs cudf#15852", + "tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised", # Maybe flaky, order-dependent? "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order", "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero", @@ -148,7 +172,7 @@ def pytest_configure(config: pytest.Config): def pytest_collection_modifyitems( session: pytest.Session, config: pytest.Config, items: list[pytest.Item] -): +) -> None: """Mark known failing tests.""" if config.getoption("--cudf-polars-no-fallback"): # Don't xfail tests if running without fallback diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py index 240b11bdf59..57c5fdaa7cf 100644 --- a/python/cudf_polars/cudf_polars/typing/__init__.py +++ b/python/cudf_polars/cudf_polars/typing/__init__.py @@ -5,20 +5,32 @@ from __future__ import annotations -from collections.abc import Mapping -from typing import TYPE_CHECKING, Literal, Protocol, Union - -import pylibcudf as plc +from collections.abc import Hashable, Mapping +from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir +import pylibcudf as plc + if TYPE_CHECKING: from collections.abc import Callable from typing import TypeAlias import polars as pl -IR: TypeAlias = Union[ + from cudf_polars.dsl import expr, ir, nodebase + +__all__: list[str] = [ + "PolarsIR", + "PolarsExpr", + "NodeTraverser", + "OptimizationArgs", + "GenericTransformer", + "ExprTransformer", + "IRTransformer", +] + +PolarsIR: TypeAlias = Union[ pl_ir.PythonScan, pl_ir.Scan, pl_ir.Cache, @@ -38,7 +50,7 @@ pl_ir.ExtContext, ] -Expr: TypeAlias = Union[ +PolarsExpr: TypeAlias = Union[ pl_expr.Function, pl_expr.Window, pl_expr.Literal, @@ -68,7 +80,7 @@ def set_node(self, n: int) -> None: """Set the current plan node to n.""" ... - def view_current_node(self) -> IR: + def view_current_node(self) -> PolarsIR: """Convert current plan node to python rep.""" ... @@ -80,7 +92,7 @@ def get_dtype(self, n: int) -> pl.DataType: """Get the datatype of the given expression id.""" ... - def view_expression(self, n: int) -> Expr: + def view_expression(self, n: int) -> PolarsExpr: """Convert the given expression to python rep.""" ... @@ -107,3 +119,29 @@ def set_udf( "cluster_with_columns", "no_optimization", ] + + +U_contra = TypeVar("U_contra", bound=Hashable, contravariant=True) +V_co = TypeVar("V_co", covariant=True) +NodeT = TypeVar("NodeT", bound="nodebase.Node[Any]") + + +class GenericTransformer(Protocol[U_contra, V_co]): + """Abstract protocol for recursive visitors.""" + + def __call__(self, __value: U_contra) -> V_co: + """Apply the visitor to the node.""" + ... + + @property + def state(self) -> Mapping[str, Any]: + """Arbitrary immutable state.""" + ... + + +# Quotes to avoid circular import +ExprTransformer: TypeAlias = GenericTransformer["expr.Expr", "expr.Expr"] +"""Protocol for transformation of Expr nodes.""" + +IRTransformer: TypeAlias = GenericTransformer["ir.IR", "ir.IR"] +"""Protocol for transformation of IR nodes.""" diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py index 4154a404e98..1d0479802ca 100644 --- a/python/cudf_polars/cudf_polars/utils/dtypes.py +++ b/python/cudf_polars/cudf_polars/utils/dtypes.py @@ -8,11 +8,12 @@ from functools import cache import pyarrow as pa -import pylibcudf as plc from typing_extensions import assert_never import polars as pl +import pylibcudf as plc + __all__ = ["from_polars", "downcast_arrow_lists", "can_cast"] diff --git a/python/cudf_polars/cudf_polars/utils/versions.py b/python/cudf_polars/cudf_polars/utils/versions.py index 4a7ad6b3cf2..a119cab3b74 100644 --- a/python/cudf_polars/cudf_polars/utils/versions.py +++ b/python/cudf_polars/cudf_polars/utils/versions.py @@ -12,11 +12,12 @@ POLARS_VERSION = parse(__version__) -POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8") +POLARS_VERSION_LT_111 = POLARS_VERSION < parse("1.11") +POLARS_VERSION_LT_112 = POLARS_VERSION < parse("1.12") def _ensure_polars_version(): - if POLARS_VERSION_LT_18: + if POLARS_VERSION_LT_111: raise ImportError( - "cudf_polars requires py-polars v1.8 or greater." + "cudf_polars requires py-polars v1.11 or greater." ) # pragma: no cover diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md index 7837a275f20..74b2cd4e5de 100644 --- a/python/cudf_polars/docs/overview.md +++ b/python/cudf_polars/docs/overview.md @@ -11,14 +11,17 @@ You will need: environment](https://github.com/rapidsai/cudf/blob/branch-24.12/CONTRIBUTING.md#setting-up-your-build-environment). The combined devcontainer works, or whatever your favourite approach is. -> ![NOTE] These instructions will get simpler as we merge code in. +:::{note} +These instructions will get simpler as we merge code in. +::: ## Installing polars -`cudf-polars` works with polars >= 1.3, as long as the internal IR -version doesn't get a major version bump. So `pip install polars>=1.3` -should work. For development, if we're adding things to the polars -side of things, we will need to build polars from source: +The `cudf-polars` `pyproject.toml` advertises which polars versions it +works with. So for pure `cudf-polars` development, installing as +normal and satisfying the dependencies in the repository is +sufficient. For development, if we're adding things to the polars side +of things, we will need to build polars from source: ```sh git clone https://github.com/pola-rs/polars @@ -36,7 +39,9 @@ pip install --upgrade uv uv pip install --upgrade -r py-polars/requirements-dev.txt ``` -> ![NOTE] plain `pip install` works fine, but `uv` is _much_ faster! +:::{note} +plain `pip install` works fine, but `uv` is _much_ faster! +::: Now we have the necessary machinery to build polars ```sh @@ -83,7 +88,7 @@ representation (IR). Second, an execution phase which executes using our IR. The translation phase receives the a low-level Rust `NodeTraverser` -object which delivers Python representations of the plan nodes (and +object that delivers Python representations of the plan nodes (and expressions) one at a time. During translation, we endeavour to raise `NotImplementedError` for any unsupported functionality. This way, if we can't execute something, we just don't modify the logical plan at @@ -126,7 +131,6 @@ arguments, at the moment, `raise_on_fail` is also supported, which raises, rather than falling back, during translation: ```python - result = q.collect(engine=pl.GPUEngine(raise_on_fail=True)) ``` @@ -144,13 +148,73 @@ changes. We can therefore attempt to detect the IR version appropriately. This should be done during IR translation in `translate.py`. -## Adding a handler for a new plan node +# IR design + +As noted, we translate the polars DSL into our own IR. This is both so +that we can smooth out minor version differences (advertised by +`NodeTraverser` version changes) within `cudf-polars`, and so that we +have the freedom to introduce new IR nodes and rewrite rules as might +be appropriate for GPU execution. + +To that end, we provide facilities for definition of nodes as well as +writing traversals and rewrite rules. The abstract base class `Node` +in `dsl/nodebase.py` defines the interface for implementing new nodes, +and provides many useful default methods. See also the docstrings of +the `Node` class. + +:::{note} +This generic implementation relies on nodes being treated as +*immutable*. Do not implement in-place modification of nodes, bad +things will happen. +::: + +## Defining nodes + +A concrete node type (`cudf-polars` has expression nodes, `Expr`; +and plan nodes, `IR`), should inherit from `Node`. Nodes have +two types of data: + +1. `children`: a tuple (possibly empty) of concrete nodes; +2. non-child: arbitrary data attached to the node that is _not_ a + concrete node. + +The base `Node` class requires that one advertise the names of the +non-child attributes in the `_non_child` class variable. The +constructor of the concrete node should take its arguments in the +order `*_non_child` (ordered as the class variable does) and then +`*children`. For example, the `Sort` node, which sorts a column +generated by an expression, has this definition: + +```python +class Expr(Node): + children: tuple[Expr, ...] + +class Sort(Expr): + _non_child = ("dtype", "options") + children: tuple[Expr] + def __init__(self, dtype, options, column: Expr): + self.dtype = dtype + self.options = options + self.children = (column,) +``` + +By following this pattern, we get an automatic (caching) +implementation of `__hash__` and `__eq__`, as well as a useful +`reconstruct` method that will rebuild the node with new children. + +If you want to control the behaviour of `__hash__` and `__eq__` for a +single node, override (respectively) the `get_hashable` and `is_equal` +methods. + +## Adding new translation rules from the polars IR + +### Plan nodes -Plan node definitions live in `cudf_polars/dsl/ir.py`, these are -`dataclasses` that inherit from the base `IR` node. The evaluation of -a plan node is done by implementing the `evaluate` method. +Plan node definitions live in `cudf_polars/dsl/ir.py`, these all +inherit from the base `IR` node. The evaluation of a plan node is done +by implementing the `evaluate` method. -To translate the plan node, add a case handler in `translate_ir` which +To translate the plan node, add a case handler in `translate_ir` that lives in `cudf_polars/dsl/translate.py`. As well as child nodes that are plans, most plan nodes contain child @@ -163,25 +227,12 @@ translating a `Join` node, the left keys (expressions) should be translated with the left input active (and right keys with right input). To facilitate this, use the `set_node` context manager. -## Adding a handler for a new expression node +### Expression nodes Adding a handle for an expression node is very similar to a plan node. -Expressions are all defined in `cudf_polars/dsl/expr.py` and inherit -from `Expr`. Unlike plan nodes, these are not `dataclasses`, since it -is simpler for us to implement efficient hashing, repr, and equality if we -can write that ourselves. - -Every expression consists of two types of data: -1. child data (other `Expr`s) -2. non-child data (anything other than an `Expr`) -The generic implementations of special methods in the base `Expr` base -class require that the subclasses advertise which arguments to the -constructor are non-child in a `_non_child` class slot. The -constructor should then take arguments: -```python -def __init__(self, *non_child_data: Any, *children: Expr): -``` -Read the docstrings in the `Expr` class for more details. +Expressions are defined in `cudf_polars/dsl/expressions/` and exported +into the `dsl` namespace via `expr.py`. They inherit +from `Expr`. Expressions are evaluated by implementing a `do_evaluate` method that takes a `DataFrame` as context (this provides columns) along with an @@ -198,6 +249,124 @@ To simplify state tracking, all columns should be considered immutable on construction. This matches the "functional" description coming from the logical plan in any case, so is reasonably natural. +## Traversing and transforming nodes + +In addition to representing and evaluating nodes. We also provide +facilities for traversing a tree of nodes and defining transformation +rules in `dsl/traversal.py`. The simplest is `traversal`, a +[pre-order](https://en.wikipedia.org/wiki/Tree_traversal) visit of all +unique nodes in an expression. Use this if you want to know some +specific thing about an expression. For example, to determine if an +expression contains a `Literal` node: + +```python +def has_literal(node: Expr) -> bool: + return any(isinstance(e, Literal) for e in traversal(node)) +``` + +It is often convenient to provide (immutable) state to a visitor, as +well as some facility to perform DAG-aware rewrites (reusing a +transformation for an expression if we have already seen it). We +therefore adopt the following pattern of writing DAG-aware visitors. +Suppose we want a rewrite rule (`rewrite`) between expressions +(`Expr`) and some new type `T`. We define our general transformation +function `rewrite` with type `Expr -> (Expr -> T) -> T`: + +```python +from cudf_polars.typing import GenericTransformer + +@singledispatch +def rewrite(e: Expr, rec: GenericTransformer[Expr, T]) -> T: + ... +``` + +Note in particular that the function to perform the recursion is +passed as the second argument. Rather than defining methods on each +node in turn for a particular rewrite rule, we prefer free functions +and use `functools.singledispatch` to provide dispatching. We now, in +the usual fashion, register handlers for different expression types. +To use this function, we need to be able to provide both the +expression to convert and the recursive function itself. To do this we +must convert our `rewrite` function into something that only takes a +single argument (the expression to rewrite), but carries around +information about how to perform the recursion. To this end, we have +two utilities in `traversal.py`: + +- `make_recursive` and +- `CachingVisitor`. + +These both implement the `GenericTransformer` protocol, and can be +wrapped around a transformation function like `rewrite` to provide a +function `Expr -> T`. They also allow us to attach arbitrary +*immutable* state to our visitor by passing a `state` dictionary. This +dictionary can then be inspected by the concrete transformation +function. `make_recursive` is very simple, and provides no caching of +intermediate results (so any DAGs that are visited will be viewed as +trees). `CachingVisitor` provides the same interface, but maintains a +cache of intermediate results, and reuses them if the same expression +is seen again. + +Finally, for writing transformations that take nodes and deliver new +nodes (e.g. rewrite rules), we have a final utility +`reuse_if_unchanged` that can be used as a base case transformation +for node to node rewrites. It is a depth-first visit that transforms +children but only returns a new node with new children if the rewrite +of children returned new nodes. + +To see how these pieces fit together, let us consider writing a +`rename` function that takes an expression (potentially with +references to columns) along with a mapping defining a renaming +between (some subset of) column names. The goal is to deliver a new +expression with appropriate columns renamed. + +To start, we define the dispatch function +```python +from collections.abc import Mapping +from functools import singledispatch +from cudf_polars.dsl.traversal import ( + CachingVisitor, make_recursive, reuse_if_unchanged +) +from cudf_polars.dsl.expr import Col, Expr +from cudf_polars.typing import ExprTransformer + + +@singledispatch +def _rename(e: Expr, rec: ExprTransformer) -> Expr: + raise NotImplementedError(f"No handler for {type(e)}") +``` +then we register specific handlers, first for columns: +```python +@_rename.register +def _(e: Col, rec: ExprTransformer) -> Expr: + mapping = rec.state["mapping"] # state set on rec + if e.name in mapping: + # If we have a rename, return a new Col reference + # with a new name + return type(e)(e.dtype, mapping[e.name]) + return e +``` +and then for the remaining expressions +```python +_rename.register(Expr)(reuse_if_unchanged) +``` + +:::{note} +In this case, we could have put the generic handler in the `_rename` +function, however, then we would not get a nice error message if we +accidentally sent in an object of the incorrect type. +::: + +Finally we tie everything together with a public function: + +```python +def rename(e: Expr, mapping: Mapping[str, str]) -> Expr: + """Rename column references in an expression.""" + mapper = CachingVisitor(_rename, state={"mapping": mapping}) + # or + # mapper = make_recursive(_rename, state={"mapping": mapping}) + return mapper(e) +``` + # Containers Containers should be constructed as relatively lightweight objects diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 5345fad41a2..a2c62ef9460 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "polars>=1.8,<1.9", + "polars>=1.11,<1.13", "pylibcudf==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -60,7 +60,7 @@ xfail_strict = true [tool.coverage.report] exclude_also = [ "if TYPE_CHECKING:", - "class .*\\bProtocol\\):", + "class .*\\bProtocol(?:\\[[^]]+\\])?\\):", "assert_never\\(" ] # The cudf_polars test suite doesn't exercise the plugin, so we omit @@ -188,7 +188,7 @@ required-imports = ["from __future__ import annotations"] [tool.ruff.lint.isort.sections] polars = ["polars"] -rapids = ["rmm", "cudf"] +rapids = ["rmm", "pylibcudf"] [tool.ruff.format] docstring-code-format = true diff --git a/python/cudf_polars/tests/containers/test_column.py b/python/cudf_polars/tests/containers/test_column.py index 1f26ab1af9f..95541b4ecc3 100644 --- a/python/cudf_polars/tests/containers/test_column.py +++ b/python/cudf_polars/tests/containers/test_column.py @@ -4,9 +4,10 @@ from __future__ import annotations import pyarrow -import pylibcudf as plc import pytest +import pylibcudf as plc + from cudf_polars.containers import Column diff --git a/python/cudf_polars/tests/containers/test_dataframe.py b/python/cudf_polars/tests/containers/test_dataframe.py index 5c68fb8f0aa..d68c8d90163 100644 --- a/python/cudf_polars/tests/containers/test_dataframe.py +++ b/python/cudf_polars/tests/containers/test_dataframe.py @@ -3,11 +3,12 @@ from __future__ import annotations -import pylibcudf as plc import pytest import polars as pl +import pylibcudf as plc + from cudf_polars.containers import Column, DataFrame from cudf_polars.testing.asserts import assert_gpu_result_equal diff --git a/python/cudf_polars/tests/dsl/test_expr.py b/python/cudf_polars/tests/dsl/test_expr.py index b7d4672daca..de8fec301fe 100644 --- a/python/cudf_polars/tests/dsl/test_expr.py +++ b/python/cudf_polars/tests/dsl/test_expr.py @@ -3,9 +3,10 @@ from __future__ import annotations -import pylibcudf as plc import pytest +import pylibcudf as plc + from cudf_polars.dsl import expr @@ -73,3 +74,24 @@ def test_namedexpr_repr_stable(): b2 = expr.NamedExpr("b1", expr.Col(plc.DataType(plc.TypeId.INT8), "a")) assert repr(b1) == repr(b2) + + +def test_equality_cse(): + dt = plc.DataType(plc.TypeId.INT8) + + def make_expr(n1, n2): + a = expr.Col(plc.DataType(plc.TypeId.INT8), n1) + b = expr.Col(plc.DataType(plc.TypeId.INT8), n2) + + return expr.BinOp(dt, plc.binaryop.BinaryOperator.ADD, a, b) + + e1 = make_expr("a", "b") + e2 = make_expr("a", "b") + e3 = make_expr("a", "c") + + assert e1.children is not e2.children + assert e1 == e2 + assert e1.children is e2.children + assert e1 == e2 + assert e1 != e3 + assert e2 != e3 diff --git a/python/cudf_polars/tests/dsl/test_to_ast.py b/python/cudf_polars/tests/dsl/test_to_ast.py new file mode 100644 index 00000000000..57d794d4890 --- /dev/null +++ b/python/cudf_polars/tests/dsl/test_to_ast.py @@ -0,0 +1,79 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pytest + +import polars as pl +from polars.testing import assert_frame_equal + +import pylibcudf as plc + +import cudf_polars.dsl.ir as ir_nodes +from cudf_polars import translate_ir +from cudf_polars.containers.dataframe import DataFrame, NamedColumn +from cudf_polars.dsl.to_ast import to_ast + + +@pytest.fixture(scope="module") +def df(): + return pl.LazyFrame( + { + "c": ["a", "b", "c", "d", "e", "f"], + "a": [1, 2, 3, None, 4, 5], + "b": pl.Series([None, None, 3, float("inf"), 4, 0], dtype=pl.Float64), + "d": [False, True, True, None, False, False], + } + ) + + +@pytest.mark.parametrize( + "expr", + [ + pl.col("a").is_in([0, 1]), + pl.col("a").is_between(0, 2), + (pl.col("a") < pl.col("b")).not_(), + pl.lit(2) > pl.col("a"), + pl.lit(2) >= pl.col("a"), + pl.lit(2) < pl.col("a"), + pl.lit(2) <= pl.col("a"), + pl.lit(0) == pl.col("a"), + pl.lit(1) != pl.col("a"), + (pl.col("b") < pl.lit(2, dtype=pl.Float64).sqrt()), + (pl.col("a") >= pl.lit(2)) & (pl.col("b") > 0), + pl.col("a").is_null(), + pl.col("a").is_not_null(), + pl.col("b").is_finite(), + pytest.param( + pl.col("a").sin(), + marks=pytest.mark.xfail(reason="Need to insert explicit casts"), + ), + pl.col("b").cos(), + pl.col("a").abs().is_between(0, 2), + pl.col("a").ne_missing(pl.lit(None, dtype=pl.Int64)), + [pl.col("a") * 2, pl.col("b") + pl.col("a")], + pl.col("d").not_(), + ], +) +def test_compute_column(expr, df): + q = df.select(expr) + ir = translate_ir(q._ldf.visit()) + + assert isinstance(ir, ir_nodes.Select) + table = ir.children[0].evaluate(cache={}) + name_to_index = {c.name: i for i, c in enumerate(table.columns)} + + def compute_column(e): + ast = to_ast(e.value, name_to_index=name_to_index) + if ast is not None: + return NamedColumn( + plc.transform.compute_column(table.table, ast), name=e.name + ) + return e.evaluate(table) + + got = DataFrame(map(compute_column, ir.exprs)).to_polars() + + expect = q.collect() + + assert_frame_equal(expect, got) diff --git a/python/cudf_polars/tests/dsl/test_traversal.py b/python/cudf_polars/tests/dsl/test_traversal.py new file mode 100644 index 00000000000..15c644d7978 --- /dev/null +++ b/python/cudf_polars/tests/dsl/test_traversal.py @@ -0,0 +1,229 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from functools import singledispatch + +import polars as pl +from polars.testing import assert_frame_equal + +import pylibcudf as plc + +from cudf_polars import translate_ir +from cudf_polars.dsl import expr, ir +from cudf_polars.dsl.traversal import ( + CachingVisitor, + make_recursive, + reuse_if_unchanged, + traversal, +) +from cudf_polars.typing import ExprTransformer, IRTransformer + + +def make_expr(dt, n1, n2): + a1 = expr.Col(dt, n1) + a2 = expr.Col(dt, n2) + + return expr.BinOp(dt, plc.binaryop.BinaryOperator.MUL, a1, a2) + + +def test_traversal_unique(): + dt = plc.DataType(plc.TypeId.INT8) + + e1 = make_expr(dt, "a", "a") + unique_exprs = list(traversal(e1)) + + assert len(unique_exprs) == 2 + assert set(unique_exprs) == {expr.Col(dt, "a"), e1} + assert unique_exprs == [e1, expr.Col(dt, "a")] + + e2 = make_expr(dt, "a", "b") + unique_exprs = list(traversal(e2)) + + assert len(unique_exprs) == 3 + assert set(unique_exprs) == {expr.Col(dt, "a"), expr.Col(dt, "b"), e2} + assert unique_exprs == [e2, expr.Col(dt, "a"), expr.Col(dt, "b")] + + e3 = make_expr(dt, "b", "a") + unique_exprs = list(traversal(e3)) + + assert len(unique_exprs) == 3 + assert set(unique_exprs) == {expr.Col(dt, "a"), expr.Col(dt, "b"), e3} + assert unique_exprs == [e3, expr.Col(dt, "b"), expr.Col(dt, "a")] + + +def rename(e, rec): + mapping = rec.state["mapping"] + if isinstance(e, expr.Col) and e.name in mapping: + return type(e)(e.dtype, mapping[e.name]) + return reuse_if_unchanged(e, rec) + + +def test_caching_visitor(): + dt = plc.DataType(plc.TypeId.INT8) + + e1 = make_expr(dt, "a", "b") + + mapper = CachingVisitor(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e1) + assert renamed == make_expr(dt, "a", "c") + assert len(mapper.cache) == 3 + + e2 = make_expr(dt, "a", "a") + mapper = CachingVisitor(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "a", "a") + assert len(mapper.cache) == 2 + mapper = CachingVisitor(rename, state={"mapping": {"a": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "c", "c") + assert len(mapper.cache) == 2 + + +def test_noop_visitor(): + dt = plc.DataType(plc.TypeId.INT8) + + e1 = make_expr(dt, "a", "b") + + mapper = make_recursive(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e1) + assert renamed == make_expr(dt, "a", "c") + + e2 = make_expr(dt, "a", "a") + mapper = make_recursive(rename, state={"mapping": {"b": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "a", "a") + mapper = make_recursive(rename, state={"mapping": {"a": "c"}}) + + renamed = mapper(e2) + assert renamed == make_expr(dt, "c", "c") + + +def test_rewrite_ir_node(): + df = pl.LazyFrame({"a": [1, 2, 1], "b": [1, 3, 4]}) + q = df.group_by("a").agg(pl.col("b").sum()).sort("b") + + orig = translate_ir(q._ldf.visit()) + + new_df = pl.DataFrame({"a": [1, 1, 2], "b": [-1, -2, -4]}) + + def replace_df(node, rec): + if isinstance(node, ir.DataFrameScan): + return ir.DataFrameScan( + node.schema, new_df._df, node.projection, node.predicate + ) + return reuse_if_unchanged(node, rec) + + mapper = CachingVisitor(replace_df) + + new = mapper(orig) + + result = new.evaluate(cache={}).to_polars() + + expect = pl.DataFrame({"a": [2, 1], "b": [-4, -3]}) + + assert_frame_equal(result, expect) + + +def test_rewrite_scan_node(tmp_path): + left = pl.LazyFrame({"a": [1, 2, 3], "b": [1, 3, 4]}) + right = pl.DataFrame({"a": [1, 4, 2], "c": [1, 2, 3]}) + + right.write_parquet(tmp_path / "right.pq") + + right_s = pl.scan_parquet(tmp_path / "right.pq") + + q = left.join(right_s, on="a", how="inner") + + def replace_scan(node, rec): + if isinstance(node, ir.Scan): + return ir.DataFrameScan( + node.schema, right._df, node.with_columns, node.predicate + ) + return reuse_if_unchanged(node, rec) + + mapper = CachingVisitor(replace_scan) + + orig = translate_ir(q._ldf.visit()) + new = mapper(orig) + + result = new.evaluate(cache={}).to_polars() + + expect = q.collect() + + assert_frame_equal(result, expect, check_row_order=False) + + +def test_rewrite_names_and_ops(): + df = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5], "c": [5, 6, 7], "d": [7, 9, 8]}) + + q = df.select(pl.col("a") - (pl.col("b") + pl.col("c") * 2), pl.col("d")).sort("d") + + # We will replace a -> d, c -> d, and addition with multiplication + expect = ( + df.select( + (pl.col("d") - (pl.col("b") * pl.col("d") * 2)).alias("a"), pl.col("d") + ) + .sort("d") + .collect() + ) + + qir = translate_ir(q._ldf.visit()) + + @singledispatch + def _transform(e: expr.Expr, fn: ExprTransformer) -> expr.Expr: + raise NotImplementedError("Unhandled") + + @_transform.register + def _(e: expr.Col, fn: ExprTransformer): + mapping = fn.state["mapping"] + if e.name in mapping: + return type(e)(e.dtype, mapping[e.name]) + return e + + @_transform.register + def _(e: expr.BinOp, fn: ExprTransformer): + if e.op == plc.binaryop.BinaryOperator.ADD: + return type(e)( + e.dtype, plc.binaryop.BinaryOperator.MUL, *map(fn, e.children) + ) + return reuse_if_unchanged(e, fn) + + _transform.register(expr.Expr)(reuse_if_unchanged) + + @singledispatch + def _rewrite(node: ir.IR, fn: IRTransformer) -> ir.IR: + raise NotImplementedError("Unhandled") + + @_rewrite.register + def _(node: ir.Select, fn: IRTransformer): + expr_mapper = fn.state["expr_mapper"] + return type(node)( + node.schema, + [expr.NamedExpr(e.name, expr_mapper(e.value)) for e in node.exprs], + node.should_broadcast, + fn(node.children[0]), + ) + + _rewrite.register(ir.IR)(reuse_if_unchanged) + + rewriter = CachingVisitor( + _rewrite, + state={ + "expr_mapper": CachingVisitor( + _transform, state={"mapping": {"a": "d", "c": "d"}} + ) + }, + ) + + new_ir = rewriter(qir) + + got = new_ir.evaluate(cache={}).to_polars() + + assert_frame_equal(expect, got) diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py index ced49bdc254..52bc4a9ac71 100644 --- a/python/cudf_polars/tests/expressions/test_literal.py +++ b/python/cudf_polars/tests/expressions/test_literal.py @@ -2,11 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import pylibcudf as plc import pytest import polars as pl +import pylibcudf as plc + from cudf_polars.testing.asserts import ( assert_gpu_result_equal, assert_ir_translation_raises, diff --git a/python/cudf_polars/tests/expressions/test_sort.py b/python/cudf_polars/tests/expressions/test_sort.py index 2a37683478b..62df8ce1498 100644 --- a/python/cudf_polars/tests/expressions/test_sort.py +++ b/python/cudf_polars/tests/expressions/test_sort.py @@ -4,11 +4,12 @@ import itertools -import pylibcudf as plc import pytest import polars as pl +import pylibcudf as plc + from cudf_polars import translate_ir from cudf_polars.testing.asserts import assert_gpu_result_equal diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py index 3c3986be19b..9900f598e5f 100644 --- a/python/cudf_polars/tests/test_config.py +++ b/python/cudf_polars/tests/test_config.py @@ -10,7 +10,7 @@ import rmm -from cudf_polars.dsl.ir import IR +from cudf_polars.dsl.ir import DataFrameScan from cudf_polars.testing.asserts import ( assert_gpu_result_equal, assert_ir_translation_raises, @@ -18,10 +18,10 @@ def test_polars_verbose_warns(monkeypatch): - def raise_unimplemented(self): + def raise_unimplemented(self, *args): raise NotImplementedError("We don't support this") - monkeypatch.setattr(IR, "__post_init__", raise_unimplemented) + monkeypatch.setattr(DataFrameScan, "__init__", raise_unimplemented) q = pl.LazyFrame({}) # Ensure that things raise assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py index 7d9ec98db97..8ca7a7b9264 100644 --- a/python/cudf_polars/tests/test_join.py +++ b/python/cudf_polars/tests/test_join.py @@ -2,14 +2,18 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations +from contextlib import nullcontext + import pytest import polars as pl +from polars.testing import assert_frame_equal from cudf_polars.testing.asserts import ( assert_gpu_result_equal, assert_ir_translation_raises, ) +from cudf_polars.utils.versions import POLARS_VERSION_LT_112 @pytest.fixture(params=[False, True], ids=["nulls_not_equal", "nulls_equal"]) @@ -22,6 +26,11 @@ def how(request): return request.param +@pytest.fixture(params=[None, (1, 5), (1, None), (0, 2), (0, None)]) +def zlice(request): + return request.param + + @pytest.fixture def left(): return pl.LazyFrame( @@ -37,8 +46,9 @@ def left(): def right(): return pl.LazyFrame( { - "a": [1, 4, 3, 7, None, None], - "c": [2, 3, 4, 5, 6, 7], + "a": [1, 4, 3, 7, None, None, 1], + "c": [2, 3, 4, 5, 6, 7, 8], + "d": [6, None, 7, 8, -1, 2, 4], } ) @@ -70,11 +80,31 @@ def test_coalesce_join(left, right, how, join_nulls, join_expr): query = left.join( right, on=join_expr, how=how, join_nulls=join_nulls, coalesce=True ) - assert_gpu_result_equal(query, check_row_order=False) + assert_gpu_result_equal(query, check_row_order=how == "left") -def test_cross_join(left, right): +def test_left_join_with_slice(left, right, join_nulls, zlice): + q = left.join(right, on="a", how="left", join_nulls=join_nulls, coalesce=True) + ctx = nullcontext() + if zlice is not None: + q_expect = q.collect().slice(*zlice) + q = q.slice(*zlice) + if POLARS_VERSION_LT_112 and (zlice == (1, 5) or zlice == (0, 2)): + # https://github.com/pola-rs/polars/issues/19403 + # https://github.com/pola-rs/polars/issues/19405 + ctx = pytest.raises(AssertionError) + assert_frame_equal( + q_expect, q.collect(engine=pl.GPUEngine(raise_on_fail=True)) + ) + + with ctx: + assert_gpu_result_equal(q) + + +def test_cross_join(left, right, zlice): q = left.join(right, how="cross") + if zlice is not None: + q = q.slice(*zlice) assert_gpu_result_equal(q) @@ -86,3 +116,26 @@ def test_join_literal_key_unsupported(left, right, left_on, right_on): q = left.join(right, left_on=left_on, right_on=right_on, how="inner") assert_ir_translation_raises(q, NotImplementedError) + + +@pytest.mark.parametrize( + "conditions", + [ + [pl.col("a") < pl.col("a_right")], + [pl.col("a_right") <= pl.col("a") * 2], + [pl.col("b") * 2 > pl.col("a_right"), pl.col("a") == pl.col("c_right")], + [pl.col("b") * 2 <= pl.col("a_right"), pl.col("a") < pl.col("c_right")], + [pl.col("b") <= pl.col("a_right") * 7, pl.col("a") < pl.col("d") * 2], + ], +) +def test_join_where(left, right, conditions, zlice): + q = left.join_where(right, *conditions) + + assert_gpu_result_equal(q, check_row_order=False) + + if zlice is not None: + q_len = q.slice(*zlice).select(pl.len()) + # Can't compare result, since row order is not guaranteed and + # therefore we only check the length + + assert_gpu_result_equal(q_len) diff --git a/python/cudf_polars/tests/test_parquet_filters.py b/python/cudf_polars/tests/test_parquet_filters.py new file mode 100644 index 00000000000..545a89250fc --- /dev/null +++ b/python/cudf_polars/tests/test_parquet_filters.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl +from polars.testing import assert_frame_equal + + +@pytest.fixture(scope="module") +def df(): + return pl.DataFrame( + { + "c": ["a", "b", "c", "d", "e", "f"], + "a": [1, 2, 3, None, 4, 5], + "b": pl.Series([None, None, 3, float("inf"), 4, 0], dtype=pl.Float64), + "d": [-1, 2, -3, None, 4, -5], + } + ) + + +@pytest.fixture(scope="module") +def pq_file(tmp_path_factory, df): + tmp_path = tmp_path_factory.mktemp("parquet_filter") + df.write_parquet(tmp_path / "tmp.pq", row_group_size=3) + return pl.scan_parquet(tmp_path / "tmp.pq") + + +@pytest.mark.parametrize( + "expr", + [ + pl.col("a").is_in([0, 1]), + pl.col("a").is_between(0, 2), + (pl.col("a") < 2).not_(), + pl.lit(2) > pl.col("a"), + pl.lit(2) >= pl.col("a"), + pl.lit(2) < pl.col("a"), + pl.lit(2) <= pl.col("a"), + pl.lit(0) == pl.col("a"), + pl.lit(1) != pl.col("a"), + pl.col("a") == pl.col("d"), + (pl.col("b") < pl.lit(2, dtype=pl.Float64).sqrt()), + (pl.col("a") >= pl.lit(2)) & (pl.col("b") > 0), + pl.col("b").is_finite(), + pl.col("a").is_null(), + pl.col("a").is_not_null(), + pl.col("a").abs().is_between(0, 2), + pl.col("a").ne_missing(pl.lit(None, dtype=pl.Int64)), + ], +) +@pytest.mark.parametrize("selection", [["c", "b"], ["a"], ["a", "c"], ["b"], "c"]) +def test_scan_by_hand(expr, selection, pq_file): + df = pq_file.collect() + q = pq_file.filter(expr).select(*selection) + # Not using assert_gpu_result_equal because + # https://github.com/pola-rs/polars/issues/19238 + got = q.collect(engine=pl.GPUEngine(raise_on_fail=True)) + expect = df.filter(expr).select(*selection) + assert_frame_equal(got, expect) diff --git a/python/cudf_polars/tests/utils/test_broadcast.py b/python/cudf_polars/tests/utils/test_broadcast.py index e7770bfadac..3b3b4f0f8db 100644 --- a/python/cudf_polars/tests/utils/test_broadcast.py +++ b/python/cudf_polars/tests/utils/test_broadcast.py @@ -3,9 +3,10 @@ from __future__ import annotations -import pylibcudf as plc import pytest +import pylibcudf as plc + from cudf_polars.containers import Column from cudf_polars.dsl.ir import broadcast diff --git a/python/custreamz/custreamz/tests/conftest.py b/python/custreamz/custreamz/tests/conftest.py index 1cda9b71387..c5135bc6414 100644 --- a/python/custreamz/custreamz/tests/conftest.py +++ b/python/custreamz/custreamz/tests/conftest.py @@ -2,6 +2,7 @@ import socket import pytest + from custreamz import kafka diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index af45f49d9b4..a8ab05a3922 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -65,50 +65,20 @@ include = [ ] exclude = ["*tests*"] -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", - "cudf", - "dask_cudf", -] -known_first_party = [ - "streamz", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["streamz"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm", "cudf", "dask_cudf"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" diff --git a/python/dask_cudf/dask_cudf/__init__.py b/python/dask_cudf/dask_cudf/__init__.py index 04c2ad65b99..f9df22cc436 100644 --- a/python/dask_cudf/dask_cudf/__init__.py +++ b/python/dask_cudf/dask_cudf/__init__.py @@ -7,15 +7,15 @@ # do anything for dask==2024.2.0) config.set({"dataframe.query-planning-warning": False}) -import dask.dataframe as dd -from dask.dataframe import from_delayed +import dask.dataframe as dd # noqa: E402 +from dask.dataframe import from_delayed # noqa: E402 -import cudf +import cudf # noqa: E402 -from . import backends -from ._version import __git_commit__, __version__ -from .core import concat, from_cudf, from_dask_dataframe -from .expr import QUERY_PLANNING_ON +from . import backends # noqa: E402, F401 +from ._version import __git_commit__, __version__ # noqa: E402, F401 +from .core import concat, from_cudf, from_dask_dataframe # noqa: E402 +from .expr import QUERY_PLANNING_ON # noqa: E402 def read_csv(*args, **kwargs): @@ -55,9 +55,9 @@ def inner_func(*args, **kwargs): to_orc = raise_not_implemented_error("to_orc") else: - from .core import DataFrame, Index, Series - from .groupby import groupby_agg - from .io import read_text, to_orc + from .core import DataFrame, Index, Series # noqa: F401 + from .groupby import groupby_agg # noqa: F401 + from .io import read_text, to_orc # noqa: F401 __all__ = [ diff --git a/python/dask_cudf/dask_cudf/expr/__init__.py b/python/dask_cudf/dask_cudf/expr/__init__.py index a76b655ef42..6dadadd5263 100644 --- a/python/dask_cudf/dask_cudf/expr/__init__.py +++ b/python/dask_cudf/dask_cudf/expr/__init__.py @@ -12,8 +12,8 @@ config.set({"dataframe.shuffle.method": "tasks"}) try: - import dask_cudf.expr._collection - import dask_cudf.expr._expr + import dask_cudf.expr._collection # noqa: F401 + import dask_cudf.expr._expr # noqa: F401 except ImportError as err: # Dask *should* raise an error before this. diff --git a/python/dask_cudf/dask_cudf/expr/_expr.py b/python/dask_cudf/dask_cudf/expr/_expr.py index af83a01da98..c7cf66fbffd 100644 --- a/python/dask_cudf/dask_cudf/expr/_expr.py +++ b/python/dask_cudf/dask_cudf/expr/_expr.py @@ -6,11 +6,20 @@ from dask_expr import new_collection from dask_expr._cumulative import CumulativeBlockwise from dask_expr._expr import Elemwise, Expr, RenameAxis, VarColumns +from dask_expr._groupby import ( + DecomposableGroupbyAggregation, + GroupbyAggregation, +) from dask_expr._reductions import Reduction, Var from dask_expr.io.io import FusedParquetIO -from dask_expr.io.parquet import ReadParquetPyarrowFS - -from dask.dataframe.core import is_dataframe_like, make_meta, meta_nonempty +from dask_expr.io.parquet import FragmentWrapper, ReadParquetPyarrowFS + +from dask.dataframe.core import ( + _concat, + is_dataframe_like, + make_meta, + meta_nonempty, +) from dask.dataframe.dispatch import is_categorical_dtype from dask.typing import no_default @@ -21,6 +30,210 @@ ## +def _get_spec_info(gb): + if isinstance(gb.arg, (dict, list)): + aggs = gb.arg.copy() + else: + aggs = gb.arg + + if gb._slice and not isinstance(aggs, dict): + aggs = {gb._slice: aggs} + + gb_cols = gb._by_columns + if isinstance(gb_cols, str): + gb_cols = [gb_cols] + columns = [c for c in gb.frame.columns if c not in gb_cols] + if not isinstance(aggs, dict): + aggs = {col: aggs for col in columns} + + # Assert if our output will have a MultiIndex; this will be the case if + # any value in the `aggs` dict is not a string (i.e. multiple/named + # aggregations per column) + str_cols_out = True + aggs_renames = {} + for col in aggs: + if isinstance(aggs[col], str) or callable(aggs[col]): + aggs[col] = [aggs[col]] + elif isinstance(aggs[col], dict): + str_cols_out = False + col_aggs = [] + for k, v in aggs[col].items(): + aggs_renames[col, v] = k + col_aggs.append(v) + aggs[col] = col_aggs + else: + str_cols_out = False + if col in gb_cols: + columns.append(col) + + return { + "aggs": aggs, + "columns": columns, + "str_cols_out": str_cols_out, + "aggs_renames": aggs_renames, + } + + +def _get_meta(gb): + spec_info = gb.spec_info + gb_cols = gb._by_columns + aggs = spec_info["aggs"].copy() + aggs_renames = spec_info["aggs_renames"] + if spec_info["str_cols_out"]: + # Metadata should use `str` for dict values if that is + # what the user originally specified (column names will + # be str, rather than tuples). + for col in aggs: + aggs[col] = aggs[col][0] + _meta = gb.frame._meta.groupby(gb_cols).agg(aggs) + if aggs_renames: + col_array = [] + agg_array = [] + for col, agg in _meta.columns: + col_array.append(col) + agg_array.append(aggs_renames.get((col, agg), agg)) + _meta.columns = pd.MultiIndex.from_arrays([col_array, agg_array]) + return _meta + + +class DecomposableCudfGroupbyAgg(DecomposableGroupbyAggregation): + sep = "___" + + @functools.cached_property + def spec_info(self): + return _get_spec_info(self) + + @functools.cached_property + def _meta(self): + return _get_meta(self) + + @property + def shuffle_by_index(self): + return False # We always group by column(s) + + @classmethod + def chunk(cls, df, *by, **kwargs): + from dask_cudf.groupby import _groupby_partition_agg + + return _groupby_partition_agg(df, **kwargs) + + @classmethod + def combine(cls, inputs, **kwargs): + from dask_cudf.groupby import _tree_node_agg + + return _tree_node_agg(_concat(inputs), **kwargs) + + @classmethod + def aggregate(cls, inputs, **kwargs): + from dask_cudf.groupby import _finalize_gb_agg + + return _finalize_gb_agg(_concat(inputs), **kwargs) + + @property + def chunk_kwargs(self) -> dict: + dropna = True if self.dropna is None else self.dropna + return { + "gb_cols": self._by_columns, + "aggs": self.spec_info["aggs"], + "columns": self.spec_info["columns"], + "dropna": dropna, + "sort": self.sort, + "sep": self.sep, + } + + @property + def combine_kwargs(self) -> dict: + dropna = True if self.dropna is None else self.dropna + return { + "gb_cols": self._by_columns, + "dropna": dropna, + "sort": self.sort, + "sep": self.sep, + } + + @property + def aggregate_kwargs(self) -> dict: + dropna = True if self.dropna is None else self.dropna + final_columns = self._slice or self._meta.columns + return { + "gb_cols": self._by_columns, + "aggs": self.spec_info["aggs"], + "columns": self.spec_info["columns"], + "final_columns": final_columns, + "as_index": True, + "dropna": dropna, + "sort": self.sort, + "sep": self.sep, + "str_cols_out": self.spec_info["str_cols_out"], + "aggs_renames": self.spec_info["aggs_renames"], + } + + +class CudfGroupbyAgg(GroupbyAggregation): + @functools.cached_property + def spec_info(self): + return _get_spec_info(self) + + @functools.cached_property + def _meta(self): + return _get_meta(self) + + def _lower(self): + return DecomposableCudfGroupbyAgg( + self.frame, + self.arg, + self.observed, + self.dropna, + self.split_every, + self.split_out, + self.sort, + self.shuffle_method, + self._slice, + *self.by, + ) + + +def _maybe_get_custom_expr( + gb, + aggs, + split_every=None, + split_out=None, + shuffle_method=None, + **kwargs, +): + from dask_cudf.groupby import ( + OPTIMIZED_AGGS, + _aggs_optimized, + _redirect_aggs, + ) + + if kwargs: + # Unsupported key-word arguments + return None + + if not hasattr(gb.obj._meta, "to_pandas"): + # Not cuDF-backed data + return None + + _aggs = _redirect_aggs(aggs) + if not _aggs_optimized(_aggs, OPTIMIZED_AGGS): + # One or more aggregations are unsupported + return None + + return CudfGroupbyAgg( + gb.obj.expr, + _aggs, + gb.observed, + gb.dropna, + split_every, + split_out, + gb.sort, + shuffle_method, + gb._slice, + *gb.by, + ) + + class CudfFusedParquetIO(FusedParquetIO): @staticmethod def _load_multiple_files( @@ -89,16 +302,34 @@ def _dataset_info(self): return dataset_info @staticmethod - def _table_to_pandas( - table, - index_name, - *args, - ): + def _table_to_pandas(table, index_name): df = cudf.DataFrame.from_arrow(table) if index_name is not None: df = df.set_index(index_name) return df + def _filtered_task(self, index: int): + columns = self.columns.copy() + index_name = self.index.name + if self.index is not None: + index_name = self.index.name + schema = self._dataset_info["schema"].remove_metadata() + if index_name: + if columns is None: + columns = list(schema.names) + columns.append(index_name) + return ( + self._table_to_pandas, + ( + self._fragment_to_table, + FragmentWrapper(self.fragments[index], filesystem=self.fs), + self.filters, + columns, + schema, + ), + index_name, + ) + def _tune_up(self, parent): if self._fusion_compression_factor >= 1: return diff --git a/python/dask_cudf/dask_cudf/expr/_groupby.py b/python/dask_cudf/dask_cudf/expr/_groupby.py index 65688115b59..8a16fe7615d 100644 --- a/python/dask_cudf/dask_cudf/expr/_groupby.py +++ b/python/dask_cudf/dask_cudf/expr/_groupby.py @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from dask_expr._collection import new_collection from dask_expr._groupby import ( GroupBy as DXGroupBy, SeriesGroupBy as DXSeriesGroupBy, @@ -11,6 +12,8 @@ from cudf.core.groupby.groupby import _deprecate_collect +from dask_cudf.expr._expr import _maybe_get_custom_expr + ## ## Custom groupby classes ## @@ -54,9 +57,16 @@ def _translate_arg(arg): return arg -# TODO: These classes are mostly a work-around for missing -# `observed=False` support. -# See: https://github.com/rapidsai/cudf/issues/15173 +# We define our own GroupBy classes in Dask cuDF for +# the following reasons: +# (1) We want to use a custom `aggregate` algorithm +# that performs multiple aggregations on the +# same dataframe partition at once. The upstream +# algorithm breaks distinct aggregations into +# separate tasks. +# (2) We need to work around missing `observed=False` +# support: +# https://github.com/rapidsai/cudf/issues/15173 class GroupBy(DXGroupBy): @@ -89,8 +99,15 @@ def collect(self, **kwargs): _deprecate_collect() return self._single_agg(ListAgg, **kwargs) - def aggregate(self, arg, **kwargs): - return super().aggregate(_translate_arg(arg), **kwargs) + def aggregate(self, arg, fused=True, **kwargs): + if ( + fused + and (expr := _maybe_get_custom_expr(self, arg, **kwargs)) + is not None + ): + return new_collection(expr) + else: + return super().aggregate(_translate_arg(arg), **kwargs) class SeriesGroupBy(DXSeriesGroupBy): diff --git a/python/dask_cudf/dask_cudf/io/__init__.py b/python/dask_cudf/dask_cudf/io/__init__.py index 76bb2ea99b4..0421bd755f4 100644 --- a/python/dask_cudf/dask_cudf/io/__init__.py +++ b/python/dask_cudf/dask_cudf/io/__init__.py @@ -1,11 +1,11 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. -from .csv import read_csv -from .json import read_json -from .orc import read_orc, to_orc -from .text import read_text +from .csv import read_csv # noqa: F401 +from .json import read_json # noqa: F401 +from .orc import read_orc, to_orc # noqa: F401 +from .text import read_text # noqa: F401 try: - from .parquet import read_parquet, to_parquet + from .parquet import read_parquet, to_parquet # noqa: F401 except ImportError: pass diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index a781b8242fe..39ac6474958 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -383,6 +383,12 @@ def write_metadata(parts, fmd, fs, path, append=False, **kwargs): metadata_path = fs.sep.join([path, "_metadata"]) _meta = [] if append and fmd is not None: + # Convert to bytes: + if isinstance(fmd, pq.FileMetaData): + with BytesIO() as myio: + fmd.write_metadata_file(myio) + myio.seek(0) + fmd = np.frombuffer(myio.read(), dtype="uint8") _meta = [fmd] _meta.extend([parts[i][0]["meta"] for i in range(len(parts))]) _meta = ( diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index 896c4169f5b..a29cf9a342a 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -15,7 +15,11 @@ import cudf import dask_cudf -from dask_cudf.tests.utils import skip_dask_expr, xfail_dask_expr +from dask_cudf.tests.utils import ( + require_dask_expr, + skip_dask_expr, + xfail_dask_expr, +) # Check if create_metadata_file is supported by # the current dask.dataframe version @@ -615,3 +619,48 @@ def test_timezone_column(tmpdir): got = dask_cudf.read_parquet(path) expect = cudf.read_parquet(path) dd.assert_eq(got, expect) + + +@require_dask_expr() +@pytest.mark.skipif( + not dask_cudf.backends.PYARROW_GE_15, + reason="Requires pyarrow 15", +) +@pytest.mark.parametrize("min_part_size", ["1B", "1GB"]) +def test_read_parquet_arrow_filesystem(tmpdir, min_part_size): + tmp_path = str(tmpdir) + with dask.config.set( + { + "dataframe.backend": "cudf", + "dataframe.parquet.minimum-partition-size": min_part_size, + } + ): + dd.from_dict( + {"x": range(1000), "y": ["a", "b", "c", "d"] * 250}, + npartitions=10, + ).to_parquet(tmp_path, write_index=False) + df = cudf.read_parquet(tmp_path) + ddf = dask_cudf.read_parquet(tmp_path, filesystem="arrow") + dd.assert_eq(df, ddf, check_index=False) + assert isinstance(ddf._meta, cudf.DataFrame) + assert isinstance(ddf.compute(), cudf.DataFrame) + + +@pytest.mark.parametrize("write_metadata_file", [True, False]) +def test_to_parquet_append(tmpdir, write_metadata_file): + df = cudf.DataFrame({"a": [1, 2, 3]}) + ddf = dask_cudf.from_cudf(df, npartitions=1) + ddf.to_parquet( + tmpdir, + append=True, + write_metadata_file=write_metadata_file, + write_index=False, + ) + ddf.to_parquet( + tmpdir, + append=True, + write_metadata_file=write_metadata_file, + write_index=False, + ) + ddf2 = dask_cudf.read_parquet(tmpdir) + dd.assert_eq(cudf.concat([df, df]), ddf2) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_s3.py b/python/dask_cudf/dask_cudf/io/tests/test_s3.py index cf8af82e112..90907f6fb99 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_s3.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_s3.py @@ -11,6 +11,8 @@ from dask.dataframe import assert_eq +import cudf + import dask_cudf from dask_cudf.tests.utils import QUERY_PLANNING_ON @@ -168,6 +170,8 @@ def test_read_parquet_filesystem(s3_base, s3so, pdf, filesystem): filesystem=filesystem, ) assert df.b.sum().compute() == 9 + assert isinstance(df._meta, cudf.DataFrame) + assert isinstance(df.compute(), cudf.DataFrame) def test_read_parquet_filesystem_explicit(s3_base, s3so, pdf): diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index e30474f6b94..042e69d86f4 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -14,7 +14,11 @@ import dask_cudf from dask_cudf.groupby import OPTIMIZED_AGGS, _aggs_optimized -from dask_cudf.tests.utils import QUERY_PLANNING_ON, xfail_dask_expr +from dask_cudf.tests.utils import ( + QUERY_PLANNING_ON, + require_dask_expr, + xfail_dask_expr, +) def assert_cudf_groupby_layers(ddf): @@ -556,10 +560,22 @@ def test_groupby_categorical_key(): ), ], ) +@pytest.mark.parametrize( + "fused", + [ + True, + pytest.param( + False, + marks=require_dask_expr("Not supported by legacy API"), + ), + ], +) @pytest.mark.parametrize("split_out", ["use_dask_default", 1, 2]) @pytest.mark.parametrize("split_every", [False, 4]) @pytest.mark.parametrize("npartitions", [1, 10]) -def test_groupby_agg_params(npartitions, split_every, split_out, as_index): +def test_groupby_agg_params( + npartitions, split_every, split_out, fused, as_index +): df = cudf.datasets.randomdata( nrows=150, dtypes={"name": str, "a": int, "b": int, "c": float}, @@ -574,6 +590,7 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index): "c": ["mean", "std", "var"], } + fused_kwarg = {"fused": fused} if QUERY_PLANNING_ON else {} split_kwargs = {"split_every": split_every, "split_out": split_out} if split_out == "use_dask_default": split_kwargs.pop("split_out") @@ -593,6 +610,7 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index): ddf.groupby(["name", "a"], sort=True, **maybe_as_index) .aggregate( agg_dict, + **fused_kwarg, **split_kwargs, ) .compute() @@ -614,6 +632,7 @@ def test_groupby_agg_params(npartitions, split_every, split_out, as_index): # Full check (`sort=False`) gr = ddf.groupby(["name", "a"], sort=False, **maybe_as_index).aggregate( agg_dict, + **fused_kwarg, **split_kwargs, ) pr = pddf.groupby(["name", "a"], sort=False).agg( diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index fbcd7ae5dfb..862e8f36eaa 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -69,50 +69,17 @@ version = {file = "dask_cudf/VERSION"} [tool.setuptools.packages.find] exclude = ["*tests*"] -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true +[tool.ruff] +extend = "../../pyproject.toml" -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", - "cudf", -] -known_first_party = [ - "dask_cudf", -] +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["dask_cudf"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", -] +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm", "cudf"] [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" @@ -126,5 +93,8 @@ filterwarnings = [ # https://github.com/dask/partd/blob/main/partd/pandas.py#L198 "ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning", "ignore:String support for `aggregate_files` is experimental. Behavior may change in the future.:FutureWarning:dask", + # Dask now loudly throws warnings: https://github.com/dask/dask/pull/11437 + # When the legacy implementation is removed we can remove this warning and stop running pytests with `DASK_DATAFRAME__QUERY_PLANNING=False` + "ignore:The legacy Dask DataFrame implementation is deprecated and will be removed in a future version.*:FutureWarning", ] xfail_strict = true diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 84660cbc276..c6d9ae56467 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -38,7 +38,7 @@ classifiers = [ "Environment :: GPU :: NVIDIA CUDA", ] dependencies = [ - "nvidia-nvcomp==4.0.1", + "nvidia-nvcomp==4.1.0.6", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt index 15dd2b4c34f..b1d9656afc2 100644 --- a/python/pylibcudf/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/CMakeLists.txt @@ -26,6 +26,7 @@ set(cython_sources filling.pyx gpumemoryview.pyx groupby.pyx + hashing.pyx interop.pyx join.pyx json.pyx diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd index aa67b4b1149..aa2ce957173 100644 --- a/python/pylibcudf/pylibcudf/__init__.pxd +++ b/python/pylibcudf/pylibcudf/__init__.pxd @@ -13,6 +13,8 @@ from . cimport ( expressions, filling, groupby, + hashing, + interop, join, json, labeling, @@ -62,6 +64,8 @@ __all__ = [ "filling", "gpumemoryview", "groupby", + "hashing", + "interop", "join", "json", "lists", diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py index 4033062b7e2..62a2170f83e 100644 --- a/python/pylibcudf/pylibcudf/__init__.py +++ b/python/pylibcudf/pylibcudf/__init__.py @@ -22,6 +22,7 @@ expressions, filling, groupby, + hashing, interop, io, join, @@ -73,6 +74,7 @@ "filling", "gpumemoryview", "groupby", + "hashing", "interop", "io", "join", diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx index 51b2b4cfaa3..eef73bf4e9d 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyx +++ b/python/pylibcudf/pylibcudf/binaryop.pyx @@ -100,6 +100,7 @@ cpdef bool is_supported_operation( The right hand side data type. op : BinaryOperator The operation to check. + Returns ------- bool diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd index 72ce680ba7a..335ef435f9b 100644 --- a/python/pylibcudf/pylibcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/datetime.pxd @@ -1,15 +1,56 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from pylibcudf.libcudf.datetime cimport datetime_component +from pylibcudf.column cimport Column +from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency +from pylibcudf.scalar cimport Scalar -from .column cimport Column +ctypedef fused ColumnOrScalar: + Column + Scalar +cpdef Column extract_millisecond_fraction( + Column input +) + +cpdef Column extract_microsecond_fraction( + Column input +) -cpdef Column extract_year( - Column col +cpdef Column extract_nanosecond_fraction( + Column input ) cpdef Column extract_datetime_component( - Column col, + Column input, datetime_component component ) + +cpdef Column ceil_datetimes( + Column input, + rounding_frequency freq +) + +cpdef Column floor_datetimes( + Column input, + rounding_frequency freq +) + +cpdef Column round_datetimes( + Column input, + rounding_frequency freq +) + +cpdef Column add_calendrical_months( + Column timestamps, + ColumnOrScalar months, +) + +cpdef Column day_of_year(Column input) + +cpdef Column is_leap_year(Column input) + +cpdef Column last_day_of_month(Column input) + +cpdef Column extract_quarter(Column input) + +cpdef Column days_in_month(Column input) diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index ac4335cca56..9e5e709d81d 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -3,41 +3,106 @@ from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.datetime cimport ( + add_calendrical_months as cpp_add_calendrical_months, + ceil_datetimes as cpp_ceil_datetimes, datetime_component, + day_of_year as cpp_day_of_year, + days_in_month as cpp_days_in_month, extract_datetime_component as cpp_extract_datetime_component, - extract_year as cpp_extract_year, + extract_microsecond_fraction as cpp_extract_microsecond_fraction, + extract_millisecond_fraction as cpp_extract_millisecond_fraction, + extract_nanosecond_fraction as cpp_extract_nanosecond_fraction, + extract_quarter as cpp_extract_quarter, + floor_datetimes as cpp_floor_datetimes, + is_leap_year as cpp_is_leap_year, + last_day_of_month as cpp_last_day_of_month, + round_datetimes as cpp_round_datetimes, + rounding_frequency, ) from pylibcudf.libcudf.datetime import \ datetime_component as DatetimeComponent # no-cython-lint +from pylibcudf.libcudf.datetime import \ + rounding_frequency as RoundingFrequency # no-cython-lint + +from cython.operator cimport dereference from .column cimport Column +cpdef Column extract_millisecond_fraction( + Column input +): + """ + Extract the millisecond from a datetime column. + + For details, see :cpp:func:`extract_millisecond_fraction`. + + Parameters + ---------- + input : Column + The column to extract the millisecond from. + + Returns + ------- + Column + Column with the extracted milliseconds. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_extract_millisecond_fraction(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column extract_microsecond_fraction( + Column input +): + """ + Extract the microsecond fraction from a datetime column. + + For details, see :cpp:func:`extract_microsecond_fraction`. + + Parameters + ---------- + input : Column + The column to extract the microsecond fraction from. + + Returns + ------- + Column + Column with the extracted microsecond fractions. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_extract_microsecond_fraction(input.view()) + return Column.from_libcudf(move(result)) -cpdef Column extract_year( - Column values +cpdef Column extract_nanosecond_fraction( + Column input ): """ - Extract the year from a datetime column. + Extract the nanosecond fraction from a datetime column. + + For details, see :cpp:func:`extract_nanosecond_fraction`. Parameters ---------- - values : Column - The column to extract the year from. + input : Column + The column to extract the nanosecond fraction from. Returns ------- Column - Column with the extracted years. + Column with the extracted nanosecond fractions. """ cdef unique_ptr[column] result with nogil: - result = cpp_extract_year(values.view()) + result = cpp_extract_nanosecond_fraction(input.view()) return Column.from_libcudf(move(result)) cpdef Column extract_datetime_component( - Column values, + Column input, datetime_component component ): """ @@ -47,7 +112,7 @@ cpdef Column extract_datetime_component( Parameters ---------- - values : Column + input : Column The column to extract the component from. component : DatetimeComponent The datetime component to extract. @@ -60,5 +125,237 @@ cpdef Column extract_datetime_component( cdef unique_ptr[column] result with nogil: - result = cpp_extract_datetime_component(values.view(), component) + result = cpp_extract_datetime_component(input.view(), component) + return Column.from_libcudf(move(result)) + +cpdef Column ceil_datetimes( + Column input, + rounding_frequency freq +): + """ + Round datetimes up to the nearest multiple of the given frequency. + + For details, see :cpp:func:`ceil_datetimes`. + + Parameters + ---------- + input : Column + The column of input datetime values. + freq : rounding_frequency + The frequency to round up to. + + Returns + ------- + Column + Column of the same datetime resolution as the input column. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_ceil_datetimes(input.view(), freq) + return Column.from_libcudf(move(result)) + +cpdef Column floor_datetimes( + Column input, + rounding_frequency freq +): + """ + Round datetimes down to the nearest multiple of the given frequency. + + For details, see :cpp:func:`floor_datetimes`. + + Parameters + ---------- + input : Column + The column of input datetime values. + freq : rounding_frequency + The frequency to round down to. + + Returns + ------- + Column + Column of the same datetime resolution as the input column. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_floor_datetimes(input.view(), freq) + return Column.from_libcudf(move(result)) + +cpdef Column round_datetimes( + Column input, + rounding_frequency freq +): + """ + Round datetimes to the nearest multiple of the given frequency. + + For details, see :cpp:func:`round_datetimes`. + + Parameters + ---------- + input : Column + The column of input datetime values. + freq : rounding_frequency + The frequency to round to. + + Returns + ------- + Column + Column of the same datetime resolution as the input column. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_round_datetimes(input.view(), freq) + return Column.from_libcudf(move(result)) + +cpdef Column add_calendrical_months( + Column input, + ColumnOrScalar months, +): + """ + Adds or subtracts a number of months from the datetime + type and returns a timestamp column that is of the same + type as the input timestamps column. + + For details, see :cpp:func:`add_calendrical_months`. + + Parameters + ---------- + input : Column + The column of input timestamp values. + months : ColumnOrScalar + The number of months to add. + + Returns + ------- + Column + Column of computed timestamps. + """ + if not isinstance(months, (Column, Scalar)): + raise TypeError("Must pass a Column or Scalar") + + cdef unique_ptr[column] result + + with nogil: + result = cpp_add_calendrical_months( + input.view(), + months.view() if ColumnOrScalar is Column else + dereference(months.get()) + ) + return Column.from_libcudf(move(result)) + +cpdef Column day_of_year(Column input): + """ + Computes the day number since the start of + the year from the datetime. The value is between + [1, {365-366}]. + + For details, see :cpp:func:`day_of_year`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of day numbers. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_day_of_year(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column is_leap_year(Column input): + """ + Check if the year of the given date is a leap year. + + For details, see :cpp:func:`is_leap_year`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of bools indicating whether the given year + is a leap year. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_is_leap_year(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column last_day_of_month(Column input): + """ + Computes the last day of the month. + + For details, see :cpp:func:`last_day_of_month`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of ``TIMESTAMP_DAYS`` representing the last day + of the month. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_last_day_of_month(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column extract_quarter(Column input): + """ + Returns the quarter (ie. a value from {1, 2, 3, 4}) + that the date is in. + + For details, see :cpp:func:`extract_quarter`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column indicating which quarter the date is in. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_extract_quarter(input.view()) + return Column.from_libcudf(move(result)) + +cpdef Column days_in_month(Column input): + """ + Extract the number of days in the month. + + For details, see :cpp:func:`days_in_month`. + + Parameters + ---------- + input : Column + The column of input datetime values. + + Returns + ------- + Column + Column of the number of days in the given month. + """ + cdef unique_ptr[column] result + + with nogil: + result = cpp_days_in_month(input.view()) return Column.from_libcudf(move(result)) diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index a44c9e25987..1535f68366b 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -5,7 +5,17 @@ from pylibcudf.libcudf.expressions import \ table_reference as TableReference # no-cython-lint from cython.operator cimport dereference -from libc.stdint cimport int32_t, int64_t +from libc.stdint cimport ( + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) +from libcpp cimport bool from libcpp.memory cimport make_unique, unique_ptr from libcpp.string cimport string from libcpp.utility cimport move @@ -18,12 +28,14 @@ from pylibcudf.libcudf.scalar.scalar cimport ( ) from pylibcudf.libcudf.types cimport size_type, type_id from pylibcudf.libcudf.wrappers.durations cimport ( + duration_D, duration_ms, duration_ns, duration_s, duration_us, ) from pylibcudf.libcudf.wrappers.timestamps cimport ( + timestamp_D, timestamp_ms, timestamp_ns, timestamp_s, @@ -78,6 +90,34 @@ cdef class Literal(Expression): self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) )) + elif tid == type_id.INT16: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.INT8: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT64: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT32: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT16: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.UINT8: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) + elif tid == type_id.BOOL8: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) elif tid == type_id.FLOAT64: self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) @@ -110,6 +150,10 @@ cdef class Literal(Expression): self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) )) + elif tid == type_id.TIMESTAMP_DAYS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) elif tid == type_id.DURATION_NANOSECONDS: self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) @@ -130,6 +174,10 @@ cdef class Literal(Expression): self.c_obj = move(make_unique[libcudf_exp.literal]( dereference(self.scalar.c_obj) )) + elif tid == type_id.DURATION_DAYS: + self.c_obj = move(make_unique[libcudf_exp.literal]( + dereference(self.scalar.c_obj) + )) else: raise NotImplementedError( f"Don't know how to make literal with type id {tid}" diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx index 0372e1132cc..a47004a1e42 100644 --- a/python/pylibcudf/pylibcudf/filling.pyx +++ b/python/pylibcudf/pylibcudf/filling.pyx @@ -77,6 +77,10 @@ cpdef void fill_in_place( The index at which to stop filling. value : Scalar The value to fill with. + + Returns + ------- + None """ with nogil: @@ -101,6 +105,7 @@ cpdef Column sequence(size_type size, Scalar init, Scalar step): The initial value of the sequence step : Scalar The step of the sequence + Returns ------- pylibcudf.Column diff --git a/python/pylibcudf/pylibcudf/hashing.pxd b/python/pylibcudf/pylibcudf/hashing.pxd new file mode 100644 index 00000000000..2d070ddda69 --- /dev/null +++ b/python/pylibcudf/pylibcudf/hashing.pxd @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libc.stdint cimport uint32_t, uint64_t + +from .column cimport Column +from .table cimport Table + + +cpdef Column murmurhash3_x86_32( + Table input, + uint32_t seed=* +) + +cpdef Table murmurhash3_x64_128( + Table input, + uint64_t seed=* +) + + +cpdef Column xxhash_64( + Table input, + uint64_t seed=* +) + +cpdef Column md5(Table input) +cpdef Column sha1(Table input) +cpdef Column sha224(Table input) +cpdef Column sha256(Table input) +cpdef Column sha384(Table input) +cpdef Column sha512(Table input) diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx new file mode 100644 index 00000000000..9ea3d4d1bda --- /dev/null +++ b/python/pylibcudf/pylibcudf/hashing.pyx @@ -0,0 +1,240 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from libc.stdint cimport uint32_t, uint64_t +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.hash cimport ( + DEFAULT_HASH_SEED, + md5 as cpp_md5, + murmurhash3_x64_128 as cpp_murmurhash3_x64_128, + murmurhash3_x86_32 as cpp_murmurhash3_x86_32, + sha1 as cpp_sha1, + sha224 as cpp_sha224, + sha256 as cpp_sha256, + sha384 as cpp_sha384, + sha512 as cpp_sha512, + xxhash_64 as cpp_xxhash_64, +) +from pylibcudf.libcudf.table.table cimport table + +from .column cimport Column +from .table cimport Table + +LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED + +cpdef Column murmurhash3_x86_32( + Table input, + uint32_t seed=DEFAULT_HASH_SEED +): + """Computes the MurmurHash3 32-bit hash value of each row in the given table. + + For details, see :cpp:func:`murmurhash3_x86_32`. + + Parameters + ---------- + input : Table + The table of columns to hash + seed : uint32_t + Optional seed value to use for the hash function + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_murmurhash3_x86_32( + input.view(), + seed + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Table murmurhash3_x64_128( + Table input, + uint64_t seed=DEFAULT_HASH_SEED +): + """Computes the MurmurHash3 64-bit hash value of each row in the given table. + + For details, see :cpp:func:`murmurhash3_x64_128`. + + Parameters + ---------- + input : Table + The table of columns to hash + seed : uint64_t + Optional seed value to use for the hash function + + Returns + ------- + pylibcudf.Table + A table of two UINT64 columns + """ + cdef unique_ptr[table] c_result + with nogil: + c_result = cpp_murmurhash3_x64_128( + input.view(), + seed + ) + + return Table.from_libcudf(move(c_result)) + + +cpdef Column xxhash_64( + Table input, + uint64_t seed=DEFAULT_HASH_SEED +): + """Computes the xxHash 64-bit hash value of each row in the given table. + + For details, see :cpp:func:`xxhash_64`. + + Parameters + ---------- + input : Table + The table of columns to hash + seed : uint64_t + Optional seed value to use for the hash function + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_xxhash_64( + input.view(), + seed + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column md5(Table input): + """Computes the MD5 hash value of each row in the given table. + + For details, see :cpp:func:`md5`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the md5 hash of a row from the input + + """ + + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_md5(input.view()) + return Column.from_libcudf(move(c_result)) + +cpdef Column sha1(Table input): + """Computes the SHA-1 hash value of each row in the given table. + + For details, see :cpp:func:`sha1`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha1(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha224(Table input): + """Computes the SHA-224 hash value of each row in the given table. + + For details, see :cpp:func:`sha224`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha224(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha256(Table input): + """Computes the SHA-256 hash value of each row in the given table. + + For details, see :cpp:func:`sha256`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha256(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha384(Table input): + """Computes the SHA-384 hash value of each row in the given table. + + For details, see :cpp:func:`sha384`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha384(input.view()) + return Column.from_libcudf(move(c_result)) + + +cpdef Column sha512(Table input): + """Computes the SHA-512 hash value of each row in the given table. + + For details, see :cpp:func:`sha512`. + + Parameters + ---------- + input : Table + The table of columns to hash + + Returns + ------- + pylibcudf.Column + A column where each row is the hash of a row from the input + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_sha512(input.view()) + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/interop.pxd b/python/pylibcudf/pylibcudf/interop.pxd new file mode 100644 index 00000000000..2a0a8c15fdd --- /dev/null +++ b/python/pylibcudf/pylibcudf/interop.pxd @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.table cimport Table + + +cpdef Table from_dlpack(object managed_tensor) + +cpdef object to_dlpack(Table input) diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx index 642516a1b90..61e812353b7 100644 --- a/python/pylibcudf/pylibcudf/interop.pyx +++ b/python/pylibcudf/pylibcudf/interop.pyx @@ -1,6 +1,11 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. -from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New +from cpython.pycapsule cimport ( + PyCapsule_GetPointer, + PyCapsule_IsValid, + PyCapsule_New, + PyCapsule_SetName, +) from libc.stdlib cimport free from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -16,11 +21,14 @@ from pylibcudf.libcudf.interop cimport ( ArrowArray, ArrowArrayStream, ArrowSchema, + DLManagedTensor, column_metadata, from_arrow_column as cpp_from_arrow_column, from_arrow_stream as cpp_from_arrow_stream, + from_dlpack as cpp_from_dlpack, to_arrow_host_raw, to_arrow_schema_raw, + to_dlpack as cpp_to_dlpack, ) from pylibcudf.libcudf.table.table cimport table @@ -315,3 +323,87 @@ def _to_arrow_scalar(cudf_object, metadata=None): # Note that metadata for scalars is primarily important for preserving # information on nested types since names are otherwise irrelevant. return to_arrow(Column.from_scalar(cudf_object, 1), metadata=metadata)[0] + + +cpdef Table from_dlpack(object managed_tensor): + """ + Convert a DLPack DLTensor into a cudf table. + + For details, see :cpp:func:`cudf::from_dlpack` + + Parameters + ---------- + managed_tensor : PyCapsule + A 1D or 2D column-major (Fortran order) tensor. + + Returns + ------- + Table + Table with a copy of the tensor data. + """ + if not PyCapsule_IsValid(managed_tensor, "dltensor"): + raise ValueError("Invalid PyCapsule object") + cdef unique_ptr[table] c_result + cdef DLManagedTensor* dlpack_tensor = PyCapsule_GetPointer( + managed_tensor, "dltensor" + ) + if dlpack_tensor is NULL: + raise ValueError("PyCapsule object contained a NULL pointer") + PyCapsule_SetName(managed_tensor, "used_dltensor") + + # Note: A copy is always performed when converting the dlpack + # data to a libcudf table. We also delete the dlpack_tensor pointer + # as the pointer is not deleted by libcudf's from_dlpack function. + # TODO: https://github.com/rapidsai/cudf/issues/10874 + # TODO: https://github.com/rapidsai/cudf/issues/10849 + with nogil: + c_result = cpp_from_dlpack(dlpack_tensor) + + cdef Table result = Table.from_libcudf(move(c_result)) + dlpack_tensor.deleter(dlpack_tensor) + return result + + +cpdef object to_dlpack(Table input): + """ + Convert a cudf table into a DLPack DLTensor. + + For details, see :cpp:func:`cudf::to_dlpack` + + Parameters + ---------- + input : Table + A 1D or 2D column-major (Fortran order) tensor. + + Returns + ------- + PyCapsule + 1D or 2D DLPack tensor with a copy of the table data, or nullptr. + """ + for col in input._columns: + if col.null_count(): + raise ValueError( + "Cannot create a DLPack tensor with null values. " + "Input is required to have null count as zero." + ) + cdef DLManagedTensor *dlpack_tensor + + with nogil: + dlpack_tensor = cpp_to_dlpack(input.view()) + + return PyCapsule_New( + dlpack_tensor, + "dltensor", + dlmanaged_tensor_pycapsule_deleter + ) + + +cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept: + if PyCapsule_IsValid(pycap_obj, "used_dltensor"): + # we do not call a used capsule's deleter + return + cdef DLManagedTensor* dlpack_tensor = PyCapsule_GetPointer( + pycap_obj, "dltensor" + ) + if dlpack_tensor is not NULL: + dlpack_tensor.deleter(dlpack_tensor) diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index 73cdfb96af5..8bbc120cff8 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport uint8_t +from libc.stdint cimport int32_t, uint8_t from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -41,14 +41,14 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: datetime_component component ) except + - ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency": - DAY "cudf::datetime::rounding_frequency::DAY" - HOUR "cudf::datetime::rounding_frequency::HOUR" - MINUTE "cudf::datetime::rounding_frequency::MINUTE" - SECOND "cudf::datetime::rounding_frequency::SECOND" - MILLISECOND "cudf::datetime::rounding_frequency::MILLISECOND" - MICROSECOND "cudf::datetime::rounding_frequency::MICROSECOND" - NANOSECOND "cudf::datetime::rounding_frequency::NANOSECOND" + cpdef enum class rounding_frequency(int32_t): + DAY + HOUR + MINUTE + SECOND + MILLISECOND + MICROSECOND + NANOSECOND cdef unique_ptr[column] ceil_datetimes( const column_view& column, rounding_frequency freq @@ -64,6 +64,10 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: const column_view& timestamps, const column_view& months ) except + + cdef unique_ptr[column] add_calendrical_months( + const column_view& timestamps, + const scalar& months + ) except + cdef unique_ptr[column] day_of_year(const column_view& column) except + cdef unique_ptr[column] is_leap_year(const column_view& column) except + cdef unique_ptr[column] last_day_of_month( diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd index 51678ba69d8..c4222bc9dc5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/hash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd @@ -3,6 +3,7 @@ from libc.stdint cimport uint32_t, uint64_t from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view @@ -10,36 +11,44 @@ from pylibcudf.libcudf.table.table_view cimport table_view cdef extern from "cudf/hashing.hpp" namespace "cudf::hashing" nogil: - cdef unique_ptr[column] murmurhash3_x86_32 "cudf::hashing::murmurhash3_x86_32" ( + cdef unique_ptr[column] murmurhash3_x86_32( const table_view& input, const uint32_t seed - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] md5 "cudf::hashing::md5" ( + cdef unique_ptr[table] murmurhash3_x64_128( + const table_view& input, + const uint64_t seed + ) except +libcudf_exception_handler + + cdef unique_ptr[column] md5( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha1 "cudf::hashing::sha1" ( + cdef unique_ptr[column] sha1( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha224 "cudf::hashing::sha224" ( + cdef unique_ptr[column] sha224( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha256 "cudf::hashing::sha256" ( + cdef unique_ptr[column] sha256( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha384 "cudf::hashing::sha384" ( + cdef unique_ptr[column] sha384( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] sha512 "cudf::hashing::sha512" ( + cdef unique_ptr[column] sha512( const table_view& input - ) except + + ) except +libcudf_exception_handler - cdef unique_ptr[column] xxhash_64 "cudf::hashing::xxhash_64" ( + cdef unique_ptr[column] xxhash_64( const table_view& input, const uint64_t seed - ) except + + ) except +libcudf_exception_handler + +cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil: + cdef uint32_t DEFAULT_HASH_SEED diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pyx b/python/pylibcudf/pylibcudf/libcudf/hash.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd index 30b97fdec34..b75e9ca7001 100644 --- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd @@ -32,11 +32,13 @@ cdef extern from "cudf/interop.hpp" nogil: cdef extern from "cudf/interop.hpp" namespace "cudf" \ nogil: - cdef unique_ptr[table] from_dlpack(const DLManagedTensor* tensor - ) except + + cdef unique_ptr[table] from_dlpack( + const DLManagedTensor* managed_tensor + ) except + - DLManagedTensor* to_dlpack(table_view input_table - ) except + + DLManagedTensor* to_dlpack( + const table_view& input + ) except + cdef cppclass column_metadata: column_metadata() except + diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd index 673bffa28ae..be3a2d75718 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd @@ -1,6 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from libc.stdint cimport int32_t +from libcpp cimport bool from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -8,9 +9,9 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil: - ctypedef enum letter_type: - CONSONANT 'nvtext::letter_type::CONSONANT' - VOWEL 'nvtext::letter_type::VOWEL' + cpdef enum class letter_type: + CONSONANT + VOWEL cdef unique_ptr[column] porter_stemmer_measure( const column_view & strings diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd index aabac0a617b..8dac86d688d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd @@ -9,14 +9,14 @@ from pylibcudf.libcudf.column.column_view cimport column_view cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: - cdef cppclass tokenizer_result "nvtext::tokenizer_result": + cdef cppclass tokenizer_result: uint32_t nrows_tensor uint32_t sequence_length unique_ptr[column] tensor_token_ids unique_ptr[column] tensor_attention_mask unique_ptr[column] tensor_metadata - cdef struct hashed_vocabulary "nvtext::hashed_vocabulary": + cdef cppclass hashed_vocabulary: uint16_t first_token_id uint16_t separator_token_id uint16_t unknown_token_id @@ -26,6 +26,8 @@ cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: unique_ptr[column] table unique_ptr[column] bin_coefficients unique_ptr[column] bin_offsets + unique_ptr[column] cp_metadata + unique_ptr[column] aux_cp_table cdef unique_ptr[hashed_vocabulary] load_vocabulary_file( const string &filename_hashed_vocabulary @@ -33,7 +35,7 @@ cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: cdef tokenizer_result subword_tokenize( const column_view & strings, - hashed_vocabulary & hashed_vocablary_obj, + hashed_vocabulary & hashed_vocabulary_obj, uint32_t max_sequence_length, uint32_t stride, bool do_lower, diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd index 40f0e2fa50c..6b0c90d0acc 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd @@ -6,6 +6,7 @@ from libcpp.vector cimport vector from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.strings.regex_flags cimport regex_flags from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type @@ -14,17 +15,18 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] replace_re( - column_view source_strings, - regex_program, - string_scalar repl, - size_type maxrepl) except + - - cdef unique_ptr[column] replace_with_backrefs( - column_view source_strings, - regex_program, - string repl) except + + column_view input, + regex_program prog, + string_scalar replacement, + size_type max_replace_count) except + cdef unique_ptr[column] replace_re( - column_view source_strings, + column_view input, vector[string] patterns, - column_view repls) except + + column_view replacements, + regex_flags flags) except + + + cdef unique_ptr[column] replace_with_backrefs( + column_view input, + regex_program prog, + string replacement) except + diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd index d21510bd731..47d79083b66 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd @@ -27,6 +27,11 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: column_view input ) except + + cdef unique_ptr[column] compute_column( + table_view table, + expression expr + ) except + + cdef unique_ptr[column] transform( column_view input, string unary_udf, diff --git a/python/pylibcudf/pylibcudf/libcudf/types.pxd b/python/pylibcudf/pylibcudf/libcudf/types.pxd index eabae68bc90..60e293e5cdb 100644 --- a/python/pylibcudf/pylibcudf/libcudf/types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/types.pxd @@ -70,18 +70,19 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: TIMESTAMP_MILLISECONDS TIMESTAMP_MICROSECONDS TIMESTAMP_NANOSECONDS - DICTIONARY32 - STRING - LIST - STRUCT - NUM_TYPE_IDS + DURATION_DAYS DURATION_SECONDS DURATION_MILLISECONDS DURATION_MICROSECONDS DURATION_NANOSECONDS + DICTIONARY32 + STRING + LIST DECIMAL32 DECIMAL64 DECIMAL128 + STRUCT + NUM_TYPE_IDS cdef cppclass data_type: data_type() except + diff --git a/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd index 7c648425eb5..c9c960d0a79 100644 --- a/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/wrappers/durations.pxd @@ -1,9 +1,10 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport int64_t +from libc.stdint cimport int32_t, int64_t cdef extern from "cudf/wrappers/durations.hpp" namespace "cudf" nogil: + ctypedef int32_t duration_D ctypedef int64_t duration_s ctypedef int64_t duration_ms ctypedef int64_t duration_us diff --git a/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd b/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd index 50d37fd0a68..5dcd144529d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/wrappers/timestamps.pxd @@ -1,9 +1,10 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. -from libc.stdint cimport int64_t +from libc.stdint cimport int32_t, int64_t cdef extern from "cudf/wrappers/timestamps.hpp" namespace "cudf" nogil: + ctypedef int32_t timestamp_D ctypedef int64_t timestamp_s ctypedef int64_t timestamp_ms ctypedef int64_t timestamp_us diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt index 94df9bbbebb..93e3fb15259 100644 --- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt @@ -12,8 +12,9 @@ # the License. # ============================================================================= -set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx - ngrams_tokenize.pyx +set(cython_sources + edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx ngrams_tokenize.pyx normalize.pyx + replace.pyx stemmer.pyx tokenize.pyx byte_pair_encode.pyx subword_tokenize.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd index b6659827688..ef837167eb9 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd @@ -1,11 +1,17 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from . cimport ( + byte_pair_encode, edit_distance, generate_ngrams, jaccard, minhash, ngrams_tokenize, + normalize, + replace, + stemmer, + subword_tokenize, + tokenize, ) __all__ = [ @@ -13,5 +19,11 @@ __all__ = [ "generate_ngrams", "jaccard", "minhash", - "ngrams_tokenize" + "byte_pair_encode" + "ngrams_tokenize", + "normalize", + "replace", + "stemmer", + "subword_tokenize", + "tokenize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py index f74633a3521..4f125d3a733 100644 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.py +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py @@ -1,11 +1,29 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . import edit_distance, generate_ngrams, jaccard, minhash, ngrams_tokenize +from . import ( + byte_pair_encode, + edit_distance, + generate_ngrams, + jaccard, + minhash, + ngrams_tokenize, + normalize, + replace, + stemmer, + subword_tokenize, + tokenize, +) __all__ = [ "edit_distance", "generate_ngrams", "jaccard", "minhash", + "byte_pair_encode", "ngrams_tokenize", + "normalize", + "replace", + "stemmer", + "subword_tokenize", + "tokenize", ] diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd new file mode 100644 index 00000000000..e4b93e96b9d --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.byte_pair_encode cimport bpe_merge_pairs +from pylibcudf.scalar cimport Scalar + + +cdef class BPEMergePairs: + cdef unique_ptr[bpe_merge_pairs] c_obj + +cpdef Column byte_pair_encoding( + Column input, + BPEMergePairs merge_pairs, + Scalar separator=* +) diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx new file mode 100644 index 00000000000..76caad276d4 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx @@ -0,0 +1,70 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.column.column_view cimport column_view +from pylibcudf.libcudf.nvtext.byte_pair_encode cimport ( + byte_pair_encoding as cpp_byte_pair_encoding, + load_merge_pairs as cpp_load_merge_pairs, +) +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.scalar cimport Scalar + + +cdef class BPEMergePairs: + """The table of merge pairs for the BPE encoder. + + For details, see :cpp:class:`cudf::nvtext::bpe_merge_pairs`. + """ + def __cinit__(self, Column merge_pairs): + cdef column_view c_pairs = merge_pairs.view() + with nogil: + self.c_obj = move(cpp_load_merge_pairs(c_pairs)) + +cpdef Column byte_pair_encoding( + Column input, + BPEMergePairs merge_pairs, + Scalar separator=None +): + """ + Byte pair encode the input strings. + + For details, see cpp:func:`cudf::nvtext::byte_pair_encoding` + + Parameters + ---------- + input : Column + Strings to encode. + merge_pairs : BPEMergePairs + Substrings to rebuild each string on. + separator : Scalar + String used to build the output after encoding. Default is a space. + + Returns + ------- + Column + An encoded column of strings. + """ + cdef unique_ptr[column] c_result + + if separator is None: + separator = Scalar.from_libcudf( + cpp_make_string_scalar(" ".encode()) + ) + + with nogil: + c_result = move( + cpp_byte_pair_encoding( + input.view(), + dereference(merge_pairs.c_obj.get()), + dereference(separator.c_obj.get()), + ) + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd new file mode 100644 index 00000000000..90676145afa --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from pylibcudf.column cimport Column + + +cpdef Column normalize_spaces(Column input) + +cpdef Column normalize_characters(Column input, bool do_lower_case) diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx new file mode 100644 index 00000000000..637d900b659 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx @@ -0,0 +1,64 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.nvtext.normalize cimport ( + normalize_characters as cpp_normalize_characters, + normalize_spaces as cpp_normalize_spaces, +) + + +cpdef Column normalize_spaces(Column input): + """ + Returns a new strings column by normalizing the whitespace in + each string in the input column. + + For details, see :cpp:func:`normalize_spaces` + + Parameters + ---------- + input : Column + Input strings + + Returns + ------- + Column + New strings columns of normalized strings. + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_normalize_spaces(input.view()) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column normalize_characters(Column input, bool do_lower_case): + """ + Normalizes strings characters for tokenizing. + + For details, see :cpp:func:`normalize_characters` + + Parameters + ---------- + input : Column + Input strings + do_lower_case : bool + If true, upper-case characters are converted to lower-case + and accents are stripped from those characters. If false, + accented and upper-case characters are not transformed. + + Returns + ------- + Column + Normalized strings column + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_normalize_characters(input.view(), do_lower_case) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/nvtext/replace.pxd new file mode 100644 index 00000000000..624f90e7486 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pxd @@ -0,0 +1,20 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column cimport Column +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar + + +cpdef Column replace_tokens( + Column input, + Column targets, + Column replacements, + Scalar delimiter=*, +) + +cpdef Column filter_tokens( + Column input, + size_type min_token_length, + Scalar replacement=*, + Scalar delimiter=* +) diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx new file mode 100644 index 00000000000..b65348ce14d --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx @@ -0,0 +1,109 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.nvtext.replace cimport ( + filter_tokens as cpp_filter_tokens, + replace_tokens as cpp_replace_tokens, +) +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar + + +cpdef Column replace_tokens( + Column input, + Column targets, + Column replacements, + Scalar delimiter=None, +): + """ + Replaces specified tokens with corresponding replacement strings. + + For details, see :cpp:func:`replace_tokens` + + Parameters + ---------- + input : Column + Strings column to replace + targets : Column + Strings to compare against tokens found in ``input`` + replacements : Column + Replacement strings for each string in ``targets`` + delimiter : Scalar, optional + Characters used to separate each string into tokens. + The default of empty string will identify tokens using whitespace. + + Returns + ------- + Column + New strings column with replaced strings + """ + cdef unique_ptr[column] c_result + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + with nogil: + c_result = cpp_replace_tokens( + input.view(), + targets.view(), + replacements.view(), + dereference(delimiter.get()), + ) + return Column.from_libcudf(move(c_result)) + + +cpdef Column filter_tokens( + Column input, + size_type min_token_length, + Scalar replacement=None, + Scalar delimiter=None +): + """ + Removes tokens whose lengths are less than a specified number of characters. + + For details, see :cpp:func:`filter_tokens` + + Parameters + ---------- + input : Column + Strings column to replace + min_token_length : size_type + The minimum number of characters to retain a + token in the output string + replacement : Scalar, optional + Optional replacement string to be used in place of removed tokens + delimiter : Scalar, optional + Characters used to separate each string into tokens. + The default of empty string will identify tokens using whitespace. + Returns + ------- + Column + New strings column of filtered strings + """ + cdef unique_ptr[column] c_result + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + if replacement is None: + replacement = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + + with nogil: + c_result = cpp_filter_tokens( + input.view(), + min_token_length, + dereference(replacement.get()), + dereference(delimiter.get()), + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd new file mode 100644 index 00000000000..48762efc01f --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.stemmer cimport letter_type +from pylibcudf.libcudf.types cimport size_type + +ctypedef fused ColumnOrSize: + Column + size_type + +cpdef Column is_letter(Column input, bool check_vowels, ColumnOrSize indices) + +cpdef Column porter_stemmer_measure(Column input) diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx new file mode 100644 index 00000000000..854d1053624 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx @@ -0,0 +1,76 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.nvtext.stemmer cimport ( + is_letter as cpp_is_letter, + letter_type, + porter_stemmer_measure as cpp_porter_stemmer_measure, +) +from pylibcudf.libcudf.types cimport size_type + + +cpdef Column is_letter( + Column input, + bool check_vowels, + ColumnOrSize indices +): + """ + Returns boolean column indicating if the character + or characters at the provided character index or + indices (respectively) are consonants or vowels + + For details, see :cpp:func:`is_letter` + + Parameters + ---------- + input : Column + Input strings + check_vowels : bool + If true, the check is for vowels. Otherwise the check is + for consonants. + indices : Union[Column, size_type] + The character position(s) to check in each string + + Returns + ------- + Column + New boolean column. + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_is_letter( + input.view(), + letter_type.VOWEL if check_vowels else letter_type.CONSONANT, + indices if ColumnOrSize is size_type else indices.view() + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column porter_stemmer_measure(Column input): + """ + Returns the Porter Stemmer measurements of a strings column. + + For details, see :cpp:func:`porter_stemmer_measure` + + Parameters + ---------- + input : Column + Strings column of words to measure + + Returns + ------- + Column + New column of measure values + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_porter_stemmer_measure(input.view()) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd new file mode 100644 index 00000000000..091c7b897ac --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd @@ -0,0 +1,20 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libc.stdint cimport uint32_t +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.subword_tokenize cimport hashed_vocabulary + + +cdef class HashedVocabulary: + cdef unique_ptr[hashed_vocabulary] c_obj + +cpdef tuple[Column, Column, Column] subword_tokenize( + Column input, + HashedVocabulary vocabulary_table, + uint32_t max_sequence_length, + uint32_t stride, + bool do_lower_case, + bool do_truncate, +) diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx new file mode 100644 index 00000000000..04643d3bd84 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx @@ -0,0 +1,84 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libc.stdint cimport uint32_t +from libcpp cimport bool +from libcpp.string cimport string +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.subword_tokenize cimport ( + load_vocabulary_file as cpp_load_vocabulary_file, + move as tr_move, + subword_tokenize as cpp_subword_tokenize, + tokenizer_result as cpp_tokenizer_result, +) + + +cdef class HashedVocabulary: + """The vocabulary data for use with the subword_tokenize function. + + For details, see :cpp:class:`cudf::nvtext::hashed_vocabulary`. + """ + def __cinit__(self, hash_file): + cdef string c_hash_file = str(hash_file).encode() + with nogil: + self.c_obj = move(cpp_load_vocabulary_file(c_hash_file)) + +cpdef tuple[Column, Column, Column] subword_tokenize( + Column input, + HashedVocabulary vocabulary_table, + uint32_t max_sequence_length, + uint32_t stride, + bool do_lower_case, + bool do_truncate, +): + """ + Creates a tokenizer that cleans the text, splits it into + tokens and returns token-ids from an input vocabulary. + + For details, see cpp:func:`subword_tokenize` + + Parameters + ---------- + input : Column + The input strings to tokenize. + vocabulary_table : HashedVocabulary + The vocabulary table pre-loaded into this object. + max_sequence_length : uint32_t + Limit of the number of token-ids per row in final tensor for each string. + stride : uint32_t + Each row in the output token-ids will replicate + ``max_sequence_length`` - ``stride`` the token-ids + from the previous row, unless it is the first string. + do_lower_case : bool + If true, the tokenizer will convert uppercase characters in the + input stream to lower-case and strip accents from those characters. + If false, accented and uppercase characters are not transformed. + do_truncate : bool + If true, the tokenizer will discard all the token-ids after + ``max_sequence_length`` for each input string. If false, it + will use a new row in the output token-ids to continue + generating the output. + + Returns + ------- + tuple[Column, Column, Column] + A tuple of three columns containing the + tokens, masks, and metadata. + """ + cdef cpp_tokenizer_result c_result + with nogil: + c_result = tr_move( + cpp_subword_tokenize( + input.view(), + dereference(vocabulary_table.c_obj.get()), + max_sequence_length, + stride, + do_lower_case, + do_truncate, + ) + ) + cdef Column tokens = Column.from_libcudf(move(c_result.tensor_token_ids)) + cdef Column masks = Column.from_libcudf(move(c_result.tensor_attention_mask)) + cdef Column metadata = Column.from_libcudf(move(c_result.tensor_metadata)) + return tokens, masks, metadata diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd new file mode 100644 index 00000000000..0aed9702d61 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd @@ -0,0 +1,29 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from pylibcudf.column cimport Column +from pylibcudf.libcudf.nvtext.tokenize cimport tokenize_vocabulary +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar + +cdef class TokenizeVocabulary: + cdef unique_ptr[tokenize_vocabulary] c_obj + +cpdef Column tokenize_scalar(Column input, Scalar delimiter=*) + +cpdef Column tokenize_column(Column input, Column delimiters) + +cpdef Column count_tokens_scalar(Column input, Scalar delimiter=*) + +cpdef Column count_tokens_column(Column input, Column delimiters) + +cpdef Column character_tokenize(Column input) + +cpdef Column detokenize(Column input, Column row_indices, Scalar separator=*) + +cpdef Column tokenize_with_vocabulary( + Column input, + TokenizeVocabulary vocabulary, + Scalar delimiter, + size_type default_id=* +) diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx new file mode 100644 index 00000000000..ec02e8ebf4e --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx @@ -0,0 +1,262 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.column.column_view cimport column_view +from pylibcudf.libcudf.nvtext.tokenize cimport ( + character_tokenize as cpp_character_tokenize, + count_tokens as cpp_count_tokens, + detokenize as cpp_detokenize, + load_vocabulary as cpp_load_vocabulary, + tokenize as cpp_tokenize, + tokenize_with_vocabulary as cpp_tokenize_with_vocabulary, +) +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.libcudf.types cimport size_type + + +cdef class TokenizeVocabulary: + """The Vocabulary object to be used with ``tokenize_with_vocabulary``. + + For details, see :cpp:class:`cudf::nvtext::tokenize_vocabulary`. + """ + def __cinit__(self, Column vocab): + cdef column_view c_vocab = vocab.view() + with nogil: + self.c_obj = move(cpp_load_vocabulary(c_vocab)) + +cpdef Column tokenize_scalar(Column input, Scalar delimiter=None): + """ + Returns a single column of strings by tokenizing the input + strings column using the provided characters as delimiters. + + For details, see cpp:func:`cudf::nvtext::tokenize` + + Parameters + ---------- + input : Column + Strings column to tokenize + delimiter : Scalar + String scalar used to separate individual strings into tokens + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + + with nogil: + c_result = cpp_tokenize( + input.view(), + dereference(delimiter.c_obj.get()), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column tokenize_column(Column input, Column delimiters): + """ + Returns a single column of strings by tokenizing the input + strings column using multiple strings as delimiters. + + For details, see cpp:func:`cudf::nvtext::tokenize` + + Parameters + ---------- + input : Column + Strings column to tokenize + delimiters : Column + Strings column used to separate individual strings into tokens + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_tokenize( + input.view(), + delimiters.view(), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None): + """ + Returns the number of tokens in each string of a strings column + using the provided characters as delimiters. + + For details, see cpp:func:`cudf::nvtext::count_tokens` + + Parameters + ---------- + input : Column + Strings column to count tokens + delimiters : Scalar + String scalar used to separate each string into tokens + + Returns + ------- + Column + New column of token counts + """ + cdef unique_ptr[column] c_result + + if delimiter is None: + delimiter = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + + with nogil: + c_result = cpp_count_tokens( + input.view(), + dereference(delimiter.c_obj.get()), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column count_tokens_column(Column input, Column delimiters): + """ + Returns the number of tokens in each string of a strings column + using multiple strings as delimiters. + + For details, see cpp:func:`cudf::nvtext::count_tokens` + + Parameters + ---------- + input : Column + Strings column to count tokens + delimiters : Column + Strings column used to separate each string into tokens + + Returns + ------- + Column + New column of token counts + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_count_tokens( + input.view(), + delimiters.view(), + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column character_tokenize(Column input): + """ + Returns a single column of strings by converting + each character to a string. + + For details, see cpp:func:`cudf::nvtext::character_tokens` + + Parameters + ---------- + input : Column + Strings column to tokenize + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_character_tokenize(input.view()) + + return Column.from_libcudf(move(c_result)) + +cpdef Column detokenize( + Column input, + Column row_indices, + Scalar separator=None +): + """ + Creates a strings column from a strings column of tokens + and an associated column of row ids. + + For details, see cpp:func:`cudf::nvtext::detokenize` + + Parameters + ---------- + input : Column + Strings column to detokenize + row_indices : Column + The relative output row index assigned for each token in the input column + separator : Scalar + String to append after concatenating each token to the proper output row + + Returns + ------- + Column + New strings columns of tokens + """ + cdef unique_ptr[column] c_result + + if separator is None: + separator = Scalar.from_libcudf( + cpp_make_string_scalar(" ".encode()) + ) + + with nogil: + c_result = cpp_detokenize( + input.view(), + row_indices.view(), + dereference(separator.c_obj.get()) + ) + + return Column.from_libcudf(move(c_result)) + +cpdef Column tokenize_with_vocabulary( + Column input, + TokenizeVocabulary vocabulary, + Scalar delimiter, + size_type default_id=-1 +): + """ + Returns the token ids for the input string by looking + up each delimited token in the given vocabulary. + + For details, see cpp:func:`cudf::nvtext::tokenize_with_vocabulary` + + Parameters + ---------- + input : Column + Strings column to tokenize + vocabulary : TokenizeVocabulary + Used to lookup tokens within ``input`` + delimiter : Scalar + Used to identify tokens within ``input`` + default_id : size_type + The token id to be used for tokens not found in the vocabulary; Default is -1 + + Returns + ------- + Column + Lists column of token ids + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_tokenize_with_vocabulary( + input.view(), + dereference(vocabulary.c_obj.get()), + dereference(delimiter.c_obj.get()), + default_id + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt index 04dd131cd75..5d7fbd24b91 100644 --- a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt @@ -28,6 +28,7 @@ set(cython_sources regex_program.pyx repeat.pyx replace.pyx + replace_re.pyx side_type.pyx slice.pyx strip.pyx diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pxd b/python/pylibcudf/pylibcudf/strings/__init__.pxd index 93c61f3f72c..da1c1c576c0 100644 --- a/python/pylibcudf/pylibcudf/strings/__init__.pxd +++ b/python/pylibcudf/pylibcudf/strings/__init__.pxd @@ -17,6 +17,7 @@ from . cimport ( regex_program, repeat, replace, + replace_re, side_type, slice, split, @@ -42,6 +43,7 @@ __all__ = [ "regex_program", "repeat", "replace", + "replace_re", "slice", "strip", "split", diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py index d52b0405f1e..fa7294c7dbd 100644 --- a/python/pylibcudf/pylibcudf/strings/__init__.py +++ b/python/pylibcudf/pylibcudf/strings/__init__.py @@ -17,6 +17,7 @@ regex_program, repeat, replace, + replace_re, side_type, slice, split, @@ -31,6 +32,7 @@ "capitalize", "case", "char_types", + "combine", "contains", "convert", "extract", @@ -42,6 +44,7 @@ "regex_program", "repeat", "replace", + "replace_re", "slice", "strip", "split", diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyx b/python/pylibcudf/pylibcudf/strings/regex_program.pyx index f426b6888ae..91f585cd637 100644 --- a/python/pylibcudf/pylibcudf/strings/regex_program.pyx +++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyx @@ -37,6 +37,10 @@ cdef class RegexProgram: flags : Uniont[int, RegexFlags] Regex flags for interpreting special characters in the pattern + Returns + ------- + RegexProgram + A new RegexProgram """ cdef unique_ptr[regex_program] c_prog cdef regex_flags c_flags diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx index 6db7f04fcbb..2b94f5e3fee 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace.pyx @@ -136,6 +136,7 @@ cpdef Column replace_slice( Start position where repl will be added. stop : size_type, default -1 End position (exclusive) to use for replacement. + Returns ------- pylibcudf.Column diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/strings/replace_re.pxd new file mode 100644 index 00000000000..e27ccd55f7d --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pxd @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column cimport Column +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar +from pylibcudf.strings.regex_flags cimport regex_flags +from pylibcudf.strings.regex_program cimport RegexProgram + +ctypedef fused Replacement: + Column + Scalar + +ctypedef fused Patterns: + RegexProgram + list + + +cpdef Column replace_re( + Column input, + Patterns patterns, + Replacement replacement=*, + size_type max_replace_count=*, + regex_flags flags=* +) + +cpdef Column replace_with_backrefs( + Column input, + RegexProgram prog, + str replacement +) diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx new file mode 100644 index 00000000000..ccc33fd4425 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx @@ -0,0 +1,134 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.string cimport string +from libcpp.utility cimport move +from libcpp.vector cimport vector +from pylibcudf.column cimport Column +from pylibcudf.libcudf.column.column cimport column +from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_string_scalar as cpp_make_string_scalar, +) +from pylibcudf.libcudf.strings cimport replace_re as cpp_replace_re +from pylibcudf.libcudf.types cimport size_type +from pylibcudf.scalar cimport Scalar +from pylibcudf.strings.regex_flags cimport regex_flags +from pylibcudf.strings.regex_program cimport RegexProgram + + +cpdef Column replace_re( + Column input, + Patterns patterns, + Replacement replacement=None, + size_type max_replace_count=-1, + regex_flags flags=regex_flags.DEFAULT, +): + """ + For each string, replaces any character sequence matching the given patterns + with the provided replacement. + + For details, see :cpp:func:`cudf::strings::replace_re` + + Parameters + ---------- + input : Column + Strings instance for this operation. + patterns: RegexProgram or list[str] + If RegexProgram, the regex to match to each string. + If list[str], a list of regex strings to search within each string. + replacement : Scalar or Column + If Scalar, the string used to replace the matched sequence in each string. + ``patterns`` must be a RegexProgram. + If Column, the strings used for replacement. + ``patterns`` must be a list[str]. + max_replace_count : int + The maximum number of times to replace the matched pattern + within each string. ``patterns`` must be a RegexProgram. + Default replaces every substring that is matched. + flags : RegexFlags + Regex flags for interpreting special characters in the patterns. + ``patterns`` must be a list[str] + + Returns + ------- + Column + New strings column + """ + cdef unique_ptr[column] c_result + cdef vector[string] c_patterns + + if Patterns is RegexProgram and Replacement is Scalar: + if replacement is None: + replacement = Scalar.from_libcudf( + cpp_make_string_scalar("".encode()) + ) + with nogil: + c_result = move( + cpp_replace_re.replace_re( + input.view(), + patterns.c_obj.get()[0], + dereference((replacement.get())), + max_replace_count + ) + ) + + return Column.from_libcudf(move(c_result)) + elif Patterns is list and Replacement is Column: + c_patterns.reserve(len(patterns)) + for pattern in patterns: + c_patterns.push_back(pattern.encode()) + + with nogil: + c_result = move( + cpp_replace_re.replace_re( + input.view(), + c_patterns, + replacement.view(), + flags, + ) + ) + + return Column.from_libcudf(move(c_result)) + else: + raise TypeError("Must pass either a RegexProgram and a Scalar or a list") + + +cpdef Column replace_with_backrefs( + Column input, + RegexProgram prog, + str replacement +): + """ + For each string, replaces any character sequence matching the given regex + using the replacement template for back-references. + + For details, see :cpp:func:`cudf::strings::replace_with_backrefs` + + Parameters + ---------- + input : Column + Strings instance for this operation. + + prog: RegexProgram + Regex program instance. + + replacement : str + The replacement template for creating the output string. + + Returns + ------- + Column + New strings column. + """ + cdef unique_ptr[column] c_result + cdef string c_replacement = replacement.encode() + + with nogil: + c_result = cpp_replace_re.replace_with_backrefs( + input.view(), + prog.c_obj.get()[0], + c_replacement, + ) + + return Column.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/common/utils.py b/python/pylibcudf/pylibcudf/tests/common/utils.py index 9f389fa42c4..d95849ef371 100644 --- a/python/pylibcudf/pylibcudf/tests/common/utils.py +++ b/python/pylibcudf/pylibcudf/tests/common/utils.py @@ -7,10 +7,11 @@ import numpy as np import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from pyarrow.orc import write_table as orc_write_table from pyarrow.parquet import write_table as pq_write_table + +import pylibcudf as plc from pylibcudf.io.types import CompressionType diff --git a/python/pylibcudf/pylibcudf/tests/conftest.py b/python/pylibcudf/pylibcudf/tests/conftest.py index fdce6f353ca..5265e411c7f 100644 --- a/python/pylibcudf/pylibcudf/tests/conftest.py +++ b/python/pylibcudf/pylibcudf/tests/conftest.py @@ -8,8 +8,9 @@ import numpy as np import pyarrow as pa -import pylibcudf as plc import pytest + +import pylibcudf as plc from pylibcudf.io.types import CompressionType sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common")) @@ -17,13 +18,23 @@ from utils import ALL_PA_TYPES, DEFAULT_PA_TYPES, NUMERIC_PA_TYPES -# This fixture defines the standard set of types that all tests should default to +def _type_to_str(typ): + if isinstance(typ, pa.ListType): + return f"list[{_type_to_str(typ.value_type)}]" + elif isinstance(typ, pa.StructType): + return f"struct[{', '.join(_type_to_str(typ.field(i).type) for i in range(typ.num_fields))}]" + else: + return str(typ) + + +# This fixture defines [the standard set of types that all tests should default to # running on. If there is a need for some tests to run on a different set of types, that # type list fixture should also be defined below here if it is likely to be reused # across modules. Otherwise it may be defined on a per-module basis. @pytest.fixture( scope="session", params=DEFAULT_PA_TYPES, + ids=_type_to_str, ) def pa_type(request): return request.param diff --git a/python/pylibcudf/pylibcudf/tests/io/test_avro.py b/python/pylibcudf/pylibcudf/tests/io/test_avro.py index 0cd5064a697..3d9d99ffa61 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_avro.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_avro.py @@ -5,10 +5,11 @@ import fastavro import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_and_meta_eq +import pylibcudf as plc + avro_dtype_pairs = [ ("boolean", pa.bool_()), ("int", pa.int32()), diff --git a/python/pylibcudf/pylibcudf/tests/io/test_csv.py b/python/pylibcudf/pylibcudf/tests/io/test_csv.py index ab26f23418d..22c83acc47c 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_csv.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_csv.py @@ -5,9 +5,7 @@ import pandas as pd import pyarrow as pa -import pylibcudf as plc import pytest -from pylibcudf.io.types import CompressionType from utils import ( _convert_types, assert_table_and_meta_eq, @@ -15,6 +13,9 @@ write_source_str, ) +import pylibcudf as plc +from pylibcudf.io.types import CompressionType + # Shared kwargs to pass to make_source _COMMON_CSV_SOURCE_KWARGS = { "format": "csv", diff --git a/python/pylibcudf/pylibcudf/tests/io/test_json.py b/python/pylibcudf/pylibcudf/tests/io/test_json.py index 9d976fedf00..453e5ce32a8 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_json.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_json.py @@ -3,9 +3,7 @@ import pandas as pd import pyarrow as pa -import pylibcudf as plc import pytest -from pylibcudf.io.types import CompressionType from utils import ( assert_table_and_meta_eq, make_source, @@ -13,6 +11,9 @@ write_source_str, ) +import pylibcudf as plc +from pylibcudf.io.types import CompressionType + # Shared kwargs to pass to make_source _COMMON_JSON_SOURCE_KWARGS = {"format": "json", "orient": "records"} diff --git a/python/pylibcudf/pylibcudf/tests/io/test_orc.py b/python/pylibcudf/pylibcudf/tests/io/test_orc.py index 42b14b1feff..5ed660ba6cf 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_orc.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_orc.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import _convert_types, assert_table_and_meta_eq, make_source +import pylibcudf as plc + # Shared kwargs to pass to make_source _COMMON_ORC_SOURCE_KWARGS = {"format": "orc"} diff --git a/python/pylibcudf/pylibcudf/tests/io/test_parquet.py b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py index f6e843ccf66..41298601539 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_parquet.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_parquet.py @@ -1,9 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from pyarrow.parquet import read_table +from utils import assert_table_and_meta_eq, make_source + +import pylibcudf as plc from pylibcudf.expressions import ( ASTOperator, ColumnNameReference, @@ -11,7 +13,6 @@ Literal, Operation, ) -from utils import assert_table_and_meta_eq, make_source # Shared kwargs to pass to make_source _COMMON_PARQUET_SOURCE_KWARGS = {"format": "parquet"} diff --git a/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py index 747f58ec8cf..0c43c363e55 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_source_sink_info.py @@ -2,9 +2,10 @@ import io -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.fixture(params=[plc.io.SourceInfo, plc.io.SinkInfo]) def io_class(request): diff --git a/python/pylibcudf/pylibcudf/tests/io/test_timezone.py b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py index 76b0424b2af..b3555013927 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_timezone.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import zoneinfo -import pylibcudf as plc import pytest +import pylibcudf as plc + def test_make_timezone_transition_table(): if len(zoneinfo.TZPATH) == 0: diff --git a/python/pylibcudf/pylibcudf/tests/test_binaryops.py b/python/pylibcudf/pylibcudf/tests/test_binaryops.py index f784cb3c191..bbb08e8b95a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_binaryops.py +++ b/python/pylibcudf/pylibcudf/tests/test_binaryops.py @@ -4,10 +4,11 @@ import numpy as np import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + def idfn(param): ltype, rtype, outtype, plc_op, _ = param diff --git a/python/pylibcudf/pylibcudf/tests/test_column_factories.py b/python/pylibcudf/pylibcudf/tests/test_column_factories.py index 8cedbc6d42f..e317362a76b 100644 --- a/python/pylibcudf/pylibcudf/tests/test_column_factories.py +++ b/python/pylibcudf/pylibcudf/tests/test_column_factories.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import DEFAULT_STRUCT_TESTING_TYPE, assert_column_eq +import pylibcudf as plc + EMPTY_COL_SIZE = 3 NUMERIC_TYPES = [ diff --git a/python/pylibcudf/pylibcudf/tests/test_column_from_device.py b/python/pylibcudf/pylibcudf/tests/test_column_from_device.py index 0e129fdf0ef..24cd6b9e35f 100644 --- a/python/pylibcudf/pylibcudf/tests/test_column_from_device.py +++ b/python/pylibcudf/pylibcudf/tests/test_column_from_device.py @@ -1,12 +1,13 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq import rmm +import pylibcudf as plc + VALID_TYPES = [ pa.int8(), pa.int16(), diff --git a/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py b/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py index 7a5c1664eed..6d8b5993964 100644 --- a/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py +++ b/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_eq +import pylibcudf as plc + param_pyarrow_tables = [ pa.table([]), pa.table({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}), diff --git a/python/pylibcudf/pylibcudf/tests/test_copying.py b/python/pylibcudf/pylibcudf/tests/test_copying.py index 628682d0a66..c0a41b96b1a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_copying.py +++ b/python/pylibcudf/pylibcudf/tests/test_copying.py @@ -2,7 +2,6 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import ( DEFAULT_STRUCT_TESTING_TYPE, @@ -16,6 +15,8 @@ metadata_from_arrow_type, ) +import pylibcudf as plc + # TODO: consider moving this to conftest and "pairing" # it with pa_type, so that they don't get out of sync diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py index 75930d59058..f5f24ef28e2 100644 --- a/python/pylibcudf/pylibcudf/tests/test_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py @@ -1,13 +1,15 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import calendar import datetime import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module", params=["s", "ms", "us", "ns"]) def datetime_column(has_nulls, request): @@ -45,6 +47,21 @@ def component(request): return request.param +@pytest.fixture( + params=[ + ("day", plc.datetime.RoundingFrequency.DAY), + ("hour", plc.datetime.RoundingFrequency.HOUR), + ("minute", plc.datetime.RoundingFrequency.MINUTE), + ("second", plc.datetime.RoundingFrequency.SECOND), + ("millisecond", plc.datetime.RoundingFrequency.MILLISECOND), + ("microsecond", plc.datetime.RoundingFrequency.MICROSECOND), + ("nanosecond", plc.datetime.RoundingFrequency.NANOSECOND), + ] +) +def rounding_frequency(request): + return request.param + + def test_extract_datetime_component(datetime_column, component): attr, component = component kwargs = {} @@ -58,3 +75,139 @@ def test_extract_datetime_component(datetime_column, component): ).cast(pa.int16()) assert_column_eq(expect, got) + + +@pytest.mark.parametrize( + "datetime_func", + [ + "extract_millisecond_fraction", + "extract_microsecond_fraction", + "extract_nanosecond_fraction", + ], +) +def test_datetime_extracting_functions(datetime_column, datetime_func): + pa_col = plc.interop.to_arrow(datetime_column) + got = getattr(plc.datetime, datetime_func)(datetime_column) + kwargs = {} + attr = datetime_func.split("_")[1] + if attr == "weekday": + kwargs = {"count_from_zero": False} + attr = "day_of_week" + expect = getattr(pc, attr)(pa_col, **kwargs).cast(pa.int16()) + assert_column_eq(expect, got) + + +@pytest.mark.parametrize( + "op", + [ + ("ceil_temporal", "ceil_datetimes"), + ("floor_temporal", "floor_datetimes"), + ("round_temporal", "round_datetimes"), + ], +) +def test_rounding_operations(datetime_column, op, rounding_frequency): + got = getattr(plc.datetime, op[1])(datetime_column, rounding_frequency[1]) + pa_col = plc.interop.to_arrow(datetime_column) + pa_got = plc.interop.to_arrow(got) + expect = getattr(pc, op[0])( + pa_col, + unit=rounding_frequency[0], + ).cast(pa_got.type) + assert_column_eq(expect, got) + + +@pytest.mark.parametrize( + "months", + [ + pa.scalar(-3, pa.int32()), + pa.scalar(1, pa.int16()), + pa.array([1, -3, 2, 4, -1, 5], pa.int32()), + ], +) +def test_calendrical_months(datetime_column, months): + def add_calendrical_months(timestamps, months): + result = [] + if isinstance(months, pa.Array): + months_list = months.to_pylist() + else: + months_list = [months.as_py()] * len(timestamps) + for i, dt in enumerate(timestamps): + if dt.as_py() is not None: + year, month = dt.as_py().year, dt.as_py().month + new_month = month + months_list[i] + new_year = year + (new_month - 1) // 12 + result.append( + dt.as_py().replace( + year=new_year, month=(new_month - 1) % 12 + 1 + ) + ) + else: + result.append(None) + return pa.array(result) + + pa_col = plc.interop.to_arrow(datetime_column) + got = plc.datetime.add_calendrical_months( + datetime_column, plc.interop.from_arrow(months) + ) + pa_got = plc.interop.to_arrow(got) + expect = add_calendrical_months(pa_col, months).cast(pa_got.type) + assert_column_eq(expect, got) + + +def test_day_of_year(datetime_column): + got = plc.datetime.day_of_year(datetime_column) + pa_got = plc.interop.to_arrow(got) + pa_col = plc.interop.to_arrow(datetime_column) + expect = pa.array( + [ + d.as_py().timetuple().tm_yday if d.as_py() is not None else None + for d in pa_col + ], + type=pa_got.type, + ) + assert_column_eq(expect, got) + + +def test_is_leap_year(datetime_column): + got = plc.datetime.is_leap_year(datetime_column) + pa_col = plc.interop.to_arrow(datetime_column) + expect = pc.is_leap_year(pa_col) + assert_column_eq(expect, got) + + +def test_last_day_of_month(datetime_column): + def last_day_of_month(dates): + return [ + d.replace(day=calendar.monthrange(d.year, d.month)[1]) + if d is not None + else d + for d in dates.to_pylist() + ] + + got = plc.datetime.last_day_of_month(datetime_column) + pa_got = plc.interop.to_arrow(got) + pa_col = plc.interop.to_arrow(datetime_column) + expect = pa.array(last_day_of_month(pa_col), type=pa_got.type) + assert_column_eq(expect, got) + + +def test_extract_quarter(datetime_column): + got = plc.datetime.extract_quarter(datetime_column) + pa_col = plc.interop.to_arrow(datetime_column) + pa_got = plc.interop.to_arrow(got) + expect = pc.quarter(pa_col).cast(pa_got.type) + assert_column_eq(expect, got) + + +def test_days_in_month(datetime_column): + def days_in_month(dates): + return [ + calendar.monthrange(d.year, d.month)[1] if d is not None else None + for d in dates.to_pylist() + ] + + got = plc.datetime.days_in_month(datetime_column) + pa_col = plc.interop.to_arrow(datetime_column) + pa_got = plc.interop.to_arrow(got) + expect = pa.array(days_in_month(pa_col), type=pa_got.type) + assert_column_eq(expect, got) diff --git a/python/pylibcudf/pylibcudf/tests/test_expressions.py b/python/pylibcudf/pylibcudf/tests/test_expressions.py index 5894ef4624c..52c81c49b9d 100644 --- a/python/pylibcudf/pylibcudf/tests/test_expressions.py +++ b/python/pylibcudf/pylibcudf/tests/test_expressions.py @@ -1,10 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc +import pyarrow.compute as pc import pytest +from utils import assert_column_eq -# We can't really evaluate these expressions, so just make sure -# construction works properly +import pylibcudf as plc def test_literal_construction_invalid(): @@ -22,7 +22,7 @@ def test_literal_construction_invalid(): ], ) def test_columnref_construction(tableref): - plc.expressions.ColumnReference(1.0, tableref) + plc.expressions.ColumnReference(1, tableref) def test_columnnameref_construction(): @@ -47,3 +47,35 @@ def test_columnnameref_construction(): ) def test_astoperation_construction(kwargs): plc.expressions.Operation(**kwargs) + + +def test_evaluation(): + table_h = pa.table({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + lit = pa.scalar(42, type=pa.int64()) + table = plc.interop.from_arrow(table_h) + # expr = abs(b * c - (a + 42)) + expr = plc.expressions.Operation( + plc.expressions.ASTOperator.ABS, + plc.expressions.Operation( + plc.expressions.ASTOperator.SUB, + plc.expressions.Operation( + plc.expressions.ASTOperator.MUL, + plc.expressions.ColumnReference(1), + plc.expressions.ColumnReference(2), + ), + plc.expressions.Operation( + plc.expressions.ASTOperator.ADD, + plc.expressions.ColumnReference(0), + plc.expressions.Literal(plc.interop.from_arrow(lit)), + ), + ), + ) + + expect = pc.abs( + pc.subtract( + pc.multiply(table_h["b"], table_h["c"]), pc.add(table_h["a"], lit) + ) + ) + got = plc.transform.compute_column(table, expr) + + assert_column_eq(expect, got) diff --git a/python/pylibcudf/pylibcudf/tests/test_hashing.py b/python/pylibcudf/pylibcudf/tests/test_hashing.py new file mode 100644 index 00000000000..83fb50fa4ef --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_hashing.py @@ -0,0 +1,269 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import hashlib +import struct + +import mmh3 +import numpy as np +import pyarrow as pa +import pytest +import xxhash +from utils import assert_column_eq, assert_table_eq + +import pylibcudf as plc + +SEED = 0 +METHODS = ["md5", "sha1", "sha224", "sha256", "sha384", "sha512"] + + +def scalar_to_binary(x): + if isinstance(x, str): + return x.encode() + elif isinstance(x, float): + return struct.pack("> 2))) + + +def uint_hash_combine_32(lhs, rhs): + return hash_combine_32(np.uint32(lhs), np.uint32(rhs)) + + +def libcudf_mmh3_x86_32(binary): + seed = plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + hashval = mmh3.hash(binary, seed) + return hash_combine_32(seed, hashval) + + +@pytest.fixture(params=[pa.int64(), pa.float64(), pa.string(), pa.bool_()]) +def scalar_type(request): + return request.param + + +@pytest.fixture +def pa_scalar_input_column(scalar_type): + if pa.types.is_integer(scalar_type) or pa.types.is_floating(scalar_type): + return pa.array([1, 2, 3], type=scalar_type) + elif pa.types.is_string(scalar_type): + return pa.array(["a", "b", "c"], type=scalar_type) + elif pa.types.is_boolean(scalar_type): + return pa.array([True, True, False], type=scalar_type) + + +@pytest.fixture +def plc_scalar_input_tbl(pa_scalar_input_column): + return plc.interop.from_arrow( + pa.Table.from_arrays([pa_scalar_input_column], names=["data"]) + ) + + +@pytest.fixture(scope="module") +def list_struct_table(): + data = pa.Table.from_pydict( + { + "list": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + "struct": [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}], + } + ) + return data + + +def python_hash_value(x, method): + if method == "murmurhash3_x86_32": + return libcudf_mmh3_x86_32(x) + elif method == "murmurhash3_x64_128": + hasher = mmh3.mmh3_x64_128(seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED) + hasher.update(x) + # libcudf returns a tuple of two 64-bit integers + return hasher.utupledigest() + elif method == "xxhash_64": + return xxhash.xxh64( + x, seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ).intdigest() + else: + return getattr(hashlib, method)(x).hexdigest() + + +@pytest.mark.parametrize( + "method", ["sha1", "sha224", "sha256", "sha384", "sha512", "md5"] +) +def test_hash_column_sha_md5( + pa_scalar_input_column, plc_scalar_input_tbl, method +): + plc_hasher = getattr(plc.hashing, method) + + def py_hasher(val): + return getattr(hashlib, method)(scalar_to_binary(val)).hexdigest() + + expect = pa.array( + [py_hasher(val) for val in pa_scalar_input_column.to_pylist()], + type=pa.string(), + ) + got = plc_hasher(plc_scalar_input_tbl) + assert_column_eq(got, expect) + + +def test_hash_column_xxhash64(pa_scalar_input_column, plc_scalar_input_tbl): + def py_hasher(val): + return xxhash.xxh64( + scalar_to_binary(val), seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ).intdigest() + + expect = pa.array( + [py_hasher(val) for val in pa_scalar_input_column.to_pylist()], + type=pa.uint64(), + ) + got = plc.hashing.xxhash_64(plc_scalar_input_tbl, 0) + + assert_column_eq(got, expect) + + +@pytest.mark.parametrize( + "method", ["sha1", "sha224", "sha256", "sha384", "sha512"] +) +@pytest.mark.parametrize("dtype", ["list", "struct"]) +def test_sha_list_struct_err(list_struct_table, dtype, method): + err_types = list_struct_table.select([dtype]) + plc_tbl = plc.interop.from_arrow(err_types) + plc_hasher = getattr(plc.hashing, method) + + with pytest.raises(TypeError): + plc_hasher(plc_tbl) + + +def test_md5_struct_err(list_struct_table): + err_types = list_struct_table.select(["struct"]) + plc_tbl = plc.interop.from_arrow(err_types) + + with pytest.raises(TypeError): + plc.hashing.md5(plc_tbl) + + +def test_murmurhash3_x86_32(pa_scalar_input_column, plc_scalar_input_tbl): + def py_hasher(val): + return libcudf_mmh3_x86_32(scalar_to_binary(val)) + + got = plc.hashing.murmurhash3_x86_32(plc_scalar_input_tbl, 0) + expect = pa.array( + [py_hasher(val) for val in pa_scalar_input_column.to_pylist()], + type=pa.uint32(), + ) + got = plc.hashing.murmurhash3_x86_32(plc_scalar_input_tbl, 0) + assert_column_eq(got, expect) + + +@pytest.mark.filterwarnings("ignore::RuntimeWarning") +def test_murmurhash3_x86_32_list(): + pa_tbl = pa.Table.from_pydict( + { + "list": pa.array( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], type=pa.list_(pa.uint32()) + ) + } + ) + plc_tbl = plc.interop.from_arrow(pa_tbl) + + def hash_list(list_): + hash_value = uint_hash_combine_32(0, hash_single_uint32(len(list_))) + + for element in list_: + hash_value = uint_hash_combine_32( + hash_value, + hash_single_uint32( + element, seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ), + ) + + final = uint_hash_combine_32( + plc.hashing.LIBCUDF_DEFAULT_HASH_SEED, hash_value + ) + return final + + expect = pa.array( + [hash_list(val) for val in pa_tbl["list"].to_pylist()], + type=pa.uint32(), + ) + got = plc.hashing.murmurhash3_x86_32( + plc_tbl, plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ) + assert_column_eq(got, expect) + + +@pytest.mark.filterwarnings("ignore::RuntimeWarning") +def test_murmurhash3_x86_32_struct(): + pa_tbl = pa.table( + { + "struct": pa.array( + [ + {"a": 1, "b": 2, "c": 3}, + {"a": 4, "b": 5, "c": 6}, + {"a": 7, "b": 8, "c": 9}, + ], + type=pa.struct( + [ + pa.field("a", pa.uint32()), + pa.field("b", pa.uint32(), pa.field("c", pa.uint32())), + ] + ), + ) + } + ) + plc_tbl = plc.interop.from_arrow(pa_tbl) + + def hash_struct(s): + seed = plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + keys = list(s.keys()) + + combined_hash = hash_single_uint32(s[keys[0]], seed=seed) + combined_hash = uint_hash_combine_32(0, combined_hash) + combined_hash = uint_hash_combine_32(seed, combined_hash) + + for key in keys[1:]: + current_hash = hash_single_uint32(s[key], seed=seed) + combined_hash = uint_hash_combine_32(combined_hash, current_hash) + + return combined_hash + + got = plc.hashing.murmurhash3_x86_32( + plc_tbl, plc.hashing.LIBCUDF_DEFAULT_HASH_SEED + ) + + expect = pa.array( + [hash_struct(val) for val in pa_tbl["struct"].to_pylist()], + type=pa.uint32(), + ) + assert_column_eq(got, expect) + + +def test_murmurhash3_x64_128(pa_scalar_input_column, plc_scalar_input_tbl): + def py_hasher(val): + hasher = mmh3.mmh3_x64_128(seed=plc.hashing.LIBCUDF_DEFAULT_HASH_SEED) + hasher.update(val) + return hasher.utupledigest() + + tuples = [ + py_hasher(scalar_to_binary(val)) + for val in pa_scalar_input_column.to_pylist() + ] + expect = pa.Table.from_arrays( + [ + pa.array([np.uint64(t[0]) for t in tuples]), + pa.array([np.uint64(t[1]) for t in tuples]), + ], + names=["0", "1"], + ) + got = plc.hashing.murmurhash3_x64_128(plc_scalar_input_tbl, 0) + + assert_table_eq(expect, got) diff --git a/python/pylibcudf/pylibcudf/tests/test_interop.py b/python/pylibcudf/pylibcudf/tests/test_interop.py index 01c998f16d4..af80b6e5978 100644 --- a/python/pylibcudf/pylibcudf/tests/test_interop.py +++ b/python/pylibcudf/pylibcudf/tests/test_interop.py @@ -1,8 +1,12 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import cupy as cp +import numpy as np import pyarrow as pa -import pylibcudf as plc import pytest +from utils import assert_table_eq + +import pylibcudf as plc def test_list_dtype_roundtrip(): @@ -66,3 +70,31 @@ def test_decimal_other(data_type): arrow_type = plc.interop.to_arrow(data_type, precision=precision) assert arrow_type == pa.decimal128(precision, 0) + + +def test_round_trip_dlpack_plc_table(): + expected = pa.table({"a": [1, 2, 3], "b": [5, 6, 7]}) + plc_table = plc.interop.from_arrow(expected) + result = plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table)) + assert_table_eq(expected, result) + + +@pytest.mark.parametrize("array", [np.array, cp.array]) +def test_round_trip_dlpack_array(array): + arr = array([1, 2, 3]) + result = plc.interop.from_dlpack(arr.__dlpack__()) + expected = pa.table({"a": [1, 2, 3]}) + assert_table_eq(expected, result) + + +def test_to_dlpack_error(): + plc_table = plc.interop.from_arrow( + pa.table({"a": [1, None, 3], "b": [5, 6, 7]}) + ) + with pytest.raises(ValueError, match="Cannot create a DLPack tensor"): + plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table)) + + +def test_from_dlpack_error(): + with pytest.raises(ValueError, match="Invalid PyCapsule object"): + plc.interop.from_dlpack(1) diff --git a/python/pylibcudf/pylibcudf/tests/test_join.py b/python/pylibcudf/pylibcudf/tests/test_join.py index 61e02f4d28d..f43a56046a4 100644 --- a/python/pylibcudf/pylibcudf/tests/test_join.py +++ b/python/pylibcudf/pylibcudf/tests/test_join.py @@ -2,9 +2,10 @@ import numpy as np import pyarrow as pa -import pylibcudf as plc from utils import assert_table_eq +import pylibcudf as plc + def test_cross_join(): left = pa.Table.from_arrays([[0, 1, 2], [3, 4, 5]], names=["a", "b"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_json.py b/python/pylibcudf/pylibcudf/tests/test_json.py index 3d2955211f8..486a9524e92 100644 --- a/python/pylibcudf/pylibcudf/tests/test_json.py +++ b/python/pylibcudf/pylibcudf/tests/test_json.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def plc_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_labeling.py b/python/pylibcudf/pylibcudf/tests/test_labeling.py index f7fb7463b50..beacfc63ce5 100644 --- a/python/pylibcudf/pylibcudf/tests/test_labeling.py +++ b/python/pylibcudf/pylibcudf/tests/test_labeling.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize("left_inclusive", [True, False]) @pytest.mark.parametrize("right_inclusive", [True, False]) diff --git a/python/pylibcudf/pylibcudf/tests/test_lists.py b/python/pylibcudf/pylibcudf/tests/test_lists.py index 2353a6ff8f9..f3ef555f11d 100644 --- a/python/pylibcudf/pylibcudf/tests/test_lists.py +++ b/python/pylibcudf/pylibcudf/tests/test_lists.py @@ -3,10 +3,11 @@ import numpy as np import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture def test_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_null_mask.py b/python/pylibcudf/pylibcudf/tests/test_null_mask.py index 3edcae59edc..cd3da856de2 100644 --- a/python/pylibcudf/pylibcudf/tests/test_null_mask.py +++ b/python/pylibcudf/pylibcudf/tests/test_null_mask.py @@ -1,12 +1,13 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest -from pylibcudf.null_mask import MaskState import rmm +import pylibcudf as plc +from pylibcudf.null_mask import MaskState + @pytest.fixture(params=[False, True]) def nullable(request): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_byte_pair_encode.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_byte_pair_encode.py new file mode 100644 index 00000000000..7d6718a959b --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_byte_pair_encode.py @@ -0,0 +1,46 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture(scope="module") +def input_col(): + return pa.array( + [ + "e n", + "i t", + "i s", + "e s", + "en t", + "c e", + "es t", + "en ce", + "t est", + "s ent", + ] + ) + + +@pytest.mark.parametrize( + "separator", [None, plc.interop.from_arrow(pa.scalar("e"))] +) +def test_byte_pair_encoding(input_col, separator): + plc_col = plc.interop.from_arrow( + pa.array(["test sentence", "thisis test"]) + ) + result = plc.nvtext.byte_pair_encode.byte_pair_encoding( + plc_col, + plc.nvtext.byte_pair_encode.BPEMergePairs( + plc.interop.from_arrow(input_col) + ), + separator, + ) + if separator is None: + expected = pa.array(["test sent ence", "t h is is test"]) + else: + expected = pa.array(["teste esenteence", "teheiseise etest"]) + assert_column_eq(result, expected) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py index 7d93c471cc4..8b14e0db576 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def edit_distance_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py index 5cf9874d595..fae4685f81b 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py index d5a168426b1..05fe7b53c16 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py index 4e389a63f90..ead9ee094af 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.fixture(scope="module", params=[pa.uint32(), pa.uint64()]) def minhash_input_data(request): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py index 283a009288d..84748b5597e 100644 --- a/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_ngrams_tokenize.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def input_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py new file mode 100644 index 00000000000..25b6d1389ec --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_normalize.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture(scope="module") +def norm_spaces_input_data(): + arr = ["a b", " c d\n", "e \t f "] + return pa.array(arr) + + +@pytest.fixture(scope="module") +def norm_chars_input_data(): + arr = ["éâîô\teaio", "ĂĆĖÑÜ", "ACENU", "$24.08", "[a,bb]"] + return pa.array(arr) + + +def test_normalize_spaces(norm_spaces_input_data): + result = plc.nvtext.normalize.normalize_spaces( + plc.interop.from_arrow(norm_spaces_input_data) + ) + expected = pa.array(["a b", "c d", "e f"]) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize("do_lower", [True, False]) +def test_normalize_characters(norm_chars_input_data, do_lower): + result = plc.nvtext.normalize.normalize_characters( + plc.interop.from_arrow(norm_chars_input_data), + do_lower, + ) + expected = pa.array( + ["eaio eaio", "acenu", "acenu", " $ 24 . 08", " [ a , bb ] "] + ) + if not do_lower: + expected = pa.array( + ["éâîô eaio", "ĂĆĖÑÜ", "ACENU", " $ 24 . 08", " [ a , bb ] "] + ) + assert_column_eq(result, expected) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py new file mode 100644 index 00000000000..65687f31c85 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_replace.py @@ -0,0 +1,64 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture(scope="module") +def input_col(): + arr = ["the quick", "brown fox", "jumps*over the", "lazy dog"] + return pa.array(arr) + + +@pytest.fixture(scope="module") +def targets(): + arr = ["the quick", "brown fox", "jumps*over the", "lazy dog"] + return pa.array(arr) + + +@pytest.mark.parametrize("delim", ["*", None]) +def test_replace_tokens(input_col, targets, delim): + replacements = pa.array(["slow", "cat", "looked", "rat"]) + result = plc.nvtext.replace.replace_tokens( + plc.interop.from_arrow(input_col), + plc.interop.from_arrow(targets), + plc.interop.from_arrow(replacements), + plc.interop.from_arrow(pa.scalar(delim)) if delim else None, + ) + expected = pa.array(["slow", "cat", "jumps*over the", "rat"]) + if not delim: + expected = pa.array( + ["the quick", "brown fox", "jumps*over the", "lazy dog"] + ) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize("min_token_length", [4, 5]) +@pytest.mark.parametrize("replace", ["---", None]) +@pytest.mark.parametrize("delim", ["*", None]) +def test_filter_tokens(input_col, min_token_length, replace, delim): + result = plc.nvtext.replace.filter_tokens( + plc.interop.from_arrow(input_col), + min_token_length, + plc.interop.from_arrow(pa.scalar(replace)) if replace else None, + plc.interop.from_arrow(pa.scalar(delim)) if delim else None, + ) + expected = pa.array( + ["the quick", "brown fox", "jumps*over the", "lazy dog"] + ) + if not delim and not replace and min_token_length == 4: + expected = pa.array([" quick", "brown ", "jumps*over ", "lazy "]) + if not delim and not replace and min_token_length == 5: + expected = pa.array([" quick", "brown ", "jumps*over ", " "]) + if not delim and replace == "---" and min_token_length == 4: + expected = pa.array( + ["--- quick", "brown ---", "jumps*over ---", "lazy ---"] + ) + if not delim and replace == "---" and min_token_length == 5: + expected = pa.array( + ["--- quick", "brown ---", "jumps*over ---", "--- ---"] + ) + assert_column_eq(result, expected) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py new file mode 100644 index 00000000000..e7f4a971f08 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_stemmer.py @@ -0,0 +1,48 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture(scope="module") +def input_col(): + arr = ["trouble", "toy", "syzygy"] + return pa.array(arr) + + +@pytest.mark.parametrize("check_vowels", [True, False]) +@pytest.mark.parametrize("indices", [[3, 1, 4], 1]) +def test_is_letter(input_col, check_vowels, indices): + def is_letter(s, i, check): + vowels = "aeiouy" + return (s[i] in vowels) == check + + result = plc.nvtext.stemmer.is_letter( + plc.interop.from_arrow(input_col), + check_vowels, + plc.interop.from_arrow(pa.array(indices)) + if isinstance(indices, list) + else indices, + ) + expected = pa.array( + [ + is_letter( + s, + indices[i] if isinstance(indices, list) else indices, + check_vowels, + ) + for i, s in enumerate(input_col.to_pylist()) + ] + ) + assert_column_eq(result, expected) + + +def test_porter_stemmer_measure(input_col): + result = plc.nvtext.stemmer.porter_stemmer_measure( + plc.interop.from_arrow(input_col), + ) + expected = pa.array([1, 1, 2], type=pa.int32()) + assert_column_eq(result, expected) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_subword_tokenize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_subword_tokenize.py new file mode 100644 index 00000000000..516d0f7f78d --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_subword_tokenize.py @@ -0,0 +1,63 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture +def vocab_file(tmpdir): + hash_file = tmpdir.mkdir("nvtext").join("tmp_hashed_vocab.txt") + content = "1\n0\n10\n" + coefficients = [65559] * 10 + for c in coefficients: + content = content + str(c) + " 0\n" + table = [0] * 10 + table[0] = 3015668 + content = content + "10\n" + for v in table: + content = content + str(v) + "\n" + content = content + "100\n101\n102\n\n" + hash_file.write(content) + return str(hash_file) + + +@pytest.fixture +def column_input(): + return pa.array(["This is a test"]) + + +@pytest.mark.parametrize("max_sequence_length", [64, 128]) +@pytest.mark.parametrize("stride", [32, 64]) +@pytest.mark.parametrize("do_lower_case", [True, False]) +@pytest.mark.parametrize("do_truncate", [True, False]) +def test_subword_tokenize( + vocab_file, + column_input, + max_sequence_length, + stride, + do_lower_case, + do_truncate, +): + vocab = plc.nvtext.subword_tokenize.HashedVocabulary(vocab_file) + tokens, masks, metadata = plc.nvtext.subword_tokenize.subword_tokenize( + plc.interop.from_arrow(column_input), + vocab, + max_sequence_length, + stride, + do_lower_case, + do_truncate, + ) + expected_tokens = pa.array( + [100] * 4 + [0] * (max_sequence_length - 4), type=pa.uint32() + ) + expected_masks = pa.array( + [1] * 4 + [0] * (max_sequence_length - 4), type=pa.uint32() + ) + expected_metadata = pa.array([0, 0, 3], type=pa.uint32()) + + assert_column_eq(tokens, expected_tokens) + assert_column_eq(masks, expected_masks) + assert_column_eq(metadata, expected_metadata) diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py new file mode 100644 index 00000000000..f1b4a5637e1 --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_tokenize.py @@ -0,0 +1,96 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.fixture(scope="module") +def input_col(): + return pa.array(["a", "b c", "d.e:f;"]) + + +@pytest.mark.parametrize( + "delimiter", [None, plc.interop.from_arrow(pa.scalar("."))] +) +def test_tokenize_scalar(input_col, delimiter): + result = plc.nvtext.tokenize.tokenize_scalar( + plc.interop.from_arrow(input_col), delimiter + ) + if delimiter is None: + expected = pa.array(["a", "b", "c", "d.e:f;"]) + else: + expected = pa.array(["a", "b c", "d", "e:f;"]) + assert_column_eq(result, expected) + + +def test_tokenize_column(input_col): + delimiters = pa.array([" ", ".", ":", ";"]) + result = plc.nvtext.tokenize.tokenize_column( + plc.interop.from_arrow(input_col), plc.interop.from_arrow(delimiters) + ) + expected = pa.array(["a", "b", "c", "d", "e", "f"]) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize( + "delimiter", [None, plc.interop.from_arrow(pa.scalar("."))] +) +def test_count_tokens_scalar(input_col, delimiter): + result = plc.nvtext.tokenize.count_tokens_scalar( + plc.interop.from_arrow(input_col), delimiter + ) + if delimiter is None: + expected = pa.array([1, 2, 1], type=pa.int32()) + else: + expected = pa.array([1, 1, 2], type=pa.int32()) + assert_column_eq(result, expected) + + +def test_count_tokens_column(input_col): + delimiters = pa.array([" ", ".", ":", ";"]) + result = plc.nvtext.tokenize.count_tokens_column( + plc.interop.from_arrow(input_col), plc.interop.from_arrow(delimiters) + ) + expected = pa.array([1, 2, 3], type=pa.int32()) + assert_column_eq(result, expected) + + +def test_character_tokenize(input_col): + result = plc.nvtext.tokenize.character_tokenize( + plc.interop.from_arrow(input_col) + ) + expected = pa.array(["a", "b", " ", "c", "d", ".", "e", ":", "f", ";"]) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize( + "delimiter", [None, plc.interop.from_arrow(pa.scalar("."))] +) +def test_detokenize(input_col, delimiter): + row_indices = pa.array([0, 0, 1]) + result = plc.nvtext.tokenize.detokenize( + plc.interop.from_arrow(input_col), plc.interop.from_arrow(row_indices) + ) + expected = pa.array(["a b c", "d.e:f;"]) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize("default_id", [-1, 0]) +def test_tokenize_with_vocabulary(input_col, default_id): + result = plc.nvtext.tokenize.tokenize_with_vocabulary( + plc.interop.from_arrow(input_col), + plc.nvtext.tokenize.TokenizeVocabulary( + plc.interop.from_arrow(input_col) + ), + plc.interop.from_arrow(pa.scalar(" ")), + default_id, + ) + pa_result = plc.interop.to_arrow(result) + if default_id == -1: + expected = pa.array([[0], [-1, -1], [2]], type=pa_result.type) + else: + expected = pa.array([[0], [0, 0], [2]], type=pa_result.type) + assert_column_eq(result, expected) diff --git a/python/pylibcudf/pylibcudf/tests/test_partitioning.py b/python/pylibcudf/pylibcudf/tests/test_partitioning.py index 444d0089d2c..c55e54cebc6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_partitioning.py +++ b/python/pylibcudf/pylibcudf/tests/test_partitioning.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def partitioning_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_quantiles.py b/python/pylibcudf/pylibcudf/tests/test_quantiles.py index bac56691306..e4a24fb1c98 100644 --- a/python/pylibcudf/pylibcudf/tests/test_quantiles.py +++ b/python/pylibcudf/pylibcudf/tests/test_quantiles.py @@ -3,10 +3,11 @@ import numpy as np import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq, assert_table_eq +import pylibcudf as plc + # Map pylibcudf interpolation options to pyarrow options interp_mapping = { plc.types.Interpolation.LINEAR: "linear", diff --git a/python/pylibcudf/pylibcudf/tests/test_regex_program.py b/python/pylibcudf/pylibcudf/tests/test_regex_program.py index 777315df538..52598f2c462 100644 --- a/python/pylibcudf/pylibcudf/tests/test_regex_program.py +++ b/python/pylibcudf/pylibcudf/tests/test_regex_program.py @@ -1,8 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize("pat", ["(", "*", "\\"]) def test_regex_program_invalid(pat): diff --git a/python/pylibcudf/pylibcudf/tests/test_reshape.py b/python/pylibcudf/pylibcudf/tests/test_reshape.py index 01115bc363a..ef23e23766a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_reshape.py +++ b/python/pylibcudf/pylibcudf/tests/test_reshape.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq, assert_table_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def reshape_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_round.py b/python/pylibcudf/pylibcudf/tests/test_round.py index 0b30316b9a0..2526580bc13 100644 --- a/python/pylibcudf/pylibcudf/tests/test_round.py +++ b/python/pylibcudf/pylibcudf/tests/test_round.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(params=["float32", "float64"]) def column(request, has_nulls): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_attributes.py b/python/pylibcudf/pylibcudf/tests/test_string_attributes.py index a1820def0b1..f461657281a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_attributes.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_attributes.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture() def str_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py b/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py index 176ccc55b96..3e31c75c38a 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_capitalize.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def str_data(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_case.py b/python/pylibcudf/pylibcudf/tests/test_string_case.py index 233cc253b14..08ac371fd96 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_case.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_case.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def string_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_char_types.py b/python/pylibcudf/pylibcudf/tests/test_string_char_types.py index bcd030c019e..06b44210d74 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_char_types.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_char_types.py @@ -2,9 +2,10 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_all_characters_of_type(): pa_array = pa.array(["1", "A"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_combine.py b/python/pylibcudf/pylibcudf/tests/test_string_combine.py index 4a7007a0d6b..eea3ac68e84 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_combine.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_combine.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + def test_concatenate_scalar_seperator(): plc_table = plc.interop.from_arrow( diff --git a/python/pylibcudf/pylibcudf/tests/test_string_contains.py b/python/pylibcudf/pylibcudf/tests/test_string_contains.py index 4e4dd7cbb00..ba9a4a7d3b8 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_contains.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_contains.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def target_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert.py b/python/pylibcudf/pylibcudf/tests/test_string_convert.py index 69f7a0fdd33..3f3f452c4f6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture( scope="module", diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py index 117c59ff1b8..b391d2b290e 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_booleans(): pa_array = pa.array(["true", None, "True"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py index f3e84286a36..c9368d858a4 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py @@ -3,10 +3,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture def fmt(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py index 6d704309bfd..2d3578e4e71 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py @@ -3,10 +3,11 @@ from datetime import datetime, timedelta import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture( params=[ diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py index b1c4d729604..012e722038e 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py @@ -2,9 +2,10 @@ import decimal import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_fixed_point(): typ = pa.decimal128(38, 2) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py index e9918fab559..8ee2b5075af 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_floats(): typ = pa.float32() diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py index 6d1d565af30..01192c2d1f8 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_integers.py @@ -1,8 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_to_integers(): typ = pa.int8() diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py index 4dc3e512624..b533809f106 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py @@ -1,8 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_ipv4_to_integers(): arr = pa.array(["123.45.67.890", None]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py index 8591732b39e..737036a4f0f 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.mark.parametrize("na_rep", [None, pa.scalar("")]) @pytest.mark.parametrize("separators", [None, pa.array([",", "[", "]"])]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py index fee8c3fb8f6..528736798c7 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py @@ -2,9 +2,10 @@ import urllib import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_url_encode(): data = ["/home/nfs", None] diff --git a/python/pylibcudf/pylibcudf/tests/test_string_extract.py b/python/pylibcudf/pylibcudf/tests/test_string_extract.py index 788b86423c4..e70edf4fb33 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_extract.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_extract.py @@ -2,6 +2,7 @@ import pyarrow as pa import pyarrow.compute as pc + import pylibcudf as plc diff --git a/python/pylibcudf/pylibcudf/tests/test_string_find.py b/python/pylibcudf/pylibcudf/tests/test_string_find.py index db3b13a5aae..82ec18832a9 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_find.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_find.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py b/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py index d6b37a388f0..fa9eee3594b 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_find_multiple.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_find_multiple(): arr = pa.array(["abc", "def"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_findall.py b/python/pylibcudf/pylibcudf/tests/test_string_findall.py index debfad92d00..b73d812c898 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_findall.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_findall.py @@ -2,9 +2,10 @@ import re import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_findall(): arr = pa.array(["bunny", "rabbit", "hare", "dog"]) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_padding.py b/python/pylibcudf/pylibcudf/tests/test_string_padding.py index 2ba775d17ae..79498132097 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_padding.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_padding.py @@ -2,6 +2,7 @@ import pyarrow as pa import pyarrow.compute as pc + import pylibcudf as plc diff --git a/python/pylibcudf/pylibcudf/tests/test_string_repeat.py b/python/pylibcudf/pylibcudf/tests/test_string_repeat.py index 18b5d8bf4d0..c06c06be7c6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_repeat.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_repeat.py @@ -2,9 +2,10 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize("repeats", [pa.array([2, 2]), 2]) def test_repeat_strings(repeats): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_replace.py b/python/pylibcudf/pylibcudf/tests/test_string_replace.py index 5a9c2007b73..2c7d25133de 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_replace.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_replace.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py b/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py new file mode 100644 index 00000000000..511f826441a --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_string_replace_re.py @@ -0,0 +1,72 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pyarrow.compute as pc +import pytest +from utils import assert_column_eq + +import pylibcudf as plc + + +@pytest.mark.parametrize("max_replace_count", [-1, 1]) +def test_replace_re_regex_program_scalar(max_replace_count): + arr = pa.array(["foo", "fuz", None]) + pat = "f." + repl = "ba" + result = plc.strings.replace_re.replace_re( + plc.interop.from_arrow(arr), + plc.strings.regex_program.RegexProgram.create( + pat, plc.strings.regex_flags.RegexFlags.DEFAULT + ), + plc.interop.from_arrow(pa.scalar(repl)), + max_replace_count=max_replace_count, + ) + expected = pc.replace_substring_regex( + arr, + pat, + repl, + max_replacements=max_replace_count + if max_replace_count != -1 + else None, + ) + assert_column_eq(result, expected) + + +@pytest.mark.parametrize( + "flags", + [ + plc.strings.regex_flags.RegexFlags.DEFAULT, + plc.strings.regex_flags.RegexFlags.DOTALL, + ], +) +def test_replace_re_list_str_columns(flags): + arr = pa.array(["foo", "fuz", None]) + pats = ["oo", "uz"] + repls = ["a", "b"] + result = plc.strings.replace_re.replace_re( + plc.interop.from_arrow(arr), + pats, + plc.interop.from_arrow(pa.array(repls)), + flags=flags, + ) + expected = arr + for pat, repl in zip(pats, repls): + expected = pc.replace_substring_regex( + expected, + pat, + repl, + ) + assert_column_eq(result, expected) + + +def test_replace_with_backrefs(): + arr = pa.array(["Z756", None]) + result = plc.strings.replace_re.replace_with_backrefs( + plc.interop.from_arrow(arr), + plc.strings.regex_program.RegexProgram.create( + "(\\d)(\\d)", plc.strings.regex_flags.RegexFlags.DEFAULT + ), + "V\\2\\1", + ) + expected = pa.array(["ZV576", None]) + assert_column_eq(result, expected) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_slice.py b/python/pylibcudf/pylibcudf/tests/test_string_slice.py index d9ce5591b98..1759f739e31 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_slice.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_slice.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture(scope="module") def pa_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py b/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py index 80cae8d1c6b..4e80f19b814 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_split_partition.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_table_eq +import pylibcudf as plc + @pytest.fixture def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_split_split.py b/python/pylibcudf/pylibcudf/tests/test_string_split_split.py index 2aeffac8209..450b336ce65 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_split_split.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_split_split.py @@ -2,10 +2,11 @@ import pyarrow as pa import pyarrow.compute as pc -import pylibcudf as plc import pytest from utils import assert_column_eq, assert_table_eq +import pylibcudf as plc + @pytest.fixture def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_strip.py b/python/pylibcudf/pylibcudf/tests/test_string_strip.py index 005e5e4a405..5869e5f4920 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_strip.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_strip.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + data_strings = [ "AbC", "123abc", diff --git a/python/pylibcudf/pylibcudf/tests/test_string_translate.py b/python/pylibcudf/pylibcudf/tests/test_string_translate.py index 2ae893e69fb..84fd3354ac6 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_translate.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_translate.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from utils import assert_column_eq +import pylibcudf as plc + @pytest.fixture def data_col(): diff --git a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py index a1c820cd586..00442d866e9 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py @@ -2,9 +2,10 @@ import textwrap import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_wrap(): width = 12 diff --git a/python/pylibcudf/pylibcudf/tests/test_table.py b/python/pylibcudf/pylibcudf/tests/test_table.py index e822d6a97a8..ac39ef4c5c9 100644 --- a/python/pylibcudf/pylibcudf/tests/test_table.py +++ b/python/pylibcudf/pylibcudf/tests/test_table.py @@ -1,9 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest +import pylibcudf as plc + @pytest.mark.parametrize( "arrow_tbl", diff --git a/python/pylibcudf/pylibcudf/tests/test_transform.py b/python/pylibcudf/pylibcudf/tests/test_transform.py index d5c618f07e4..49802fe64ac 100644 --- a/python/pylibcudf/pylibcudf/tests/test_transform.py +++ b/python/pylibcudf/pylibcudf/tests/test_transform.py @@ -3,9 +3,10 @@ import math import pyarrow as pa -import pylibcudf as plc from utils import assert_column_eq +import pylibcudf as plc + def test_nans_to_nulls(has_nans): if has_nans: diff --git a/python/pylibcudf/pylibcudf/tests/test_transpose.py b/python/pylibcudf/pylibcudf/tests/test_transpose.py index ac11123f680..b0c0bc72ead 100644 --- a/python/pylibcudf/pylibcudf/tests/test_transpose.py +++ b/python/pylibcudf/pylibcudf/tests/test_transpose.py @@ -1,10 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import pyarrow as pa -import pylibcudf as plc import pytest from packaging.version import parse +import pylibcudf as plc + @pytest.mark.skipif( parse(pa.__version__) < parse("16.0.0"), diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd index b530f433c97..4fb623158f0 100644 --- a/python/pylibcudf/pylibcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/transform.pxd @@ -3,6 +3,7 @@ from libcpp cimport bool from pylibcudf.libcudf.types cimport bitmask_type, data_type from .column cimport Column +from .expressions cimport Expression from .gpumemoryview cimport gpumemoryview from .table cimport Table from .types cimport DataType @@ -10,6 +11,8 @@ from .types cimport DataType cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input) +cpdef Column compute_column(Table input, Expression expr) + cpdef tuple[gpumemoryview, int] bools_to_mask(Column input) cpdef Column mask_to_bools(Py_ssize_t bitmask, int begin_bit, int end_bit) diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx index bce9702752a..e8d95cadb0c 100644 --- a/python/pylibcudf/pylibcudf/transform.pyx +++ b/python/pylibcudf/pylibcudf/transform.pyx @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from cython.operator cimport dereference from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.utility cimport move, pair @@ -43,6 +44,32 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input): ) +cpdef Column compute_column(Table input, Expression expr): + """Create a column by evaluating an expression on a table. + + For details see :cpp:func:`compute_column`. + + Parameters + ---------- + input : Table + Table used for expression evaluation + expr : Expression + Expression to evaluate + + Returns + ------- + Column of the evaluated expression + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = cpp_transform.compute_column( + input.view(), dereference(expr.c_obj.get()) + ) + + return Column.from_libcudf(move(c_result)) + + cpdef tuple[gpumemoryview, int] bools_to_mask(Column input): """Create a bitmask from a column of boolean elements diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx index 58c7d97e9bc..a0c31f994a3 100644 --- a/python/pylibcudf/pylibcudf/types.pyx +++ b/python/pylibcudf/pylibcudf/types.pyx @@ -79,6 +79,16 @@ cpdef size_type size_of(DataType t): Only fixed-width types are supported. For details, see :cpp:func:`size_of`. + + Parameters + ---------- + t : DataType + The DataType to get the size of. + + Returns + ------- + int + Size in bytes of an element of the specified type. """ with nogil: return cpp_size_of(t.c_obj) diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index ea5b3065896..a80c85a1fa8 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -53,48 +53,20 @@ test = [ Homepage = "https://github.com/rapidsai/cudf" Documentation = "https://docs.rapids.ai/api/cudf/stable/" -[tool.isort] -line_length = 79 -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -order_by_type = true -known_dask = [ - "dask", - "distributed", - "dask_cuda", -] -known_rapids = [ - "rmm", -] -known_first_party = [ - "cudf", -] -default_section = "THIRDPARTY" -sections = [ - "FUTURE", - "STDLIB", - "THIRDPARTY", - "DASK", - "RAPIDS", - "FIRSTPARTY", - "LOCALFOLDER", -] -skip = [ - "thirdparty", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".tox", - ".venv", - "_build", - "buck-out", - "build", - "dist", - "__init__.py", -] +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["cudf"] +section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +dask = ["dask", "distributed", "dask_cuda"] +rapids = ["rmm"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] [tool.pytest.ini_options] # --import-mode=importlib because two test_json.py exists and tests directory is not a structured module