diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index d86fc0e550a..e793dda3823 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 66a3b22df11..ba959f2bc27 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index a0e193ff0bf..ecc88038136 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "12.5", "installCompilers": false, "installProfilers": true, @@ -36,7 +36,7 @@ "installnvJPEG": false, "pruneStaticLibs": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 125c85cefa9..b4828038f7d 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c034752d373..fb7182f4133 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-libcudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -81,7 +81,7 @@ jobs: wheel-publish-libcudf: needs: wheel-build-libcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -92,7 +92,7 @@ jobs: wheel-build-pylibcudf: needs: [wheel-publish-libcudf] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -102,7 +102,7 @@ jobs: wheel-publish-pylibcudf: needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -113,7 +113,7 @@ jobs: wheel-build-cudf: needs: wheel-publish-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -123,7 +123,7 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -134,7 +134,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -146,7 +146,7 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -157,7 +157,7 @@ jobs: wheel-build-cudf-polars: needs: wheel-publish-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -169,7 +169,7 @@ jobs: wheel-publish-cudf-polars: needs: wheel-build-cudf-polars secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml index c676032779f..a29babb218f 100644 --- a/.github/workflows/pandas-tests.yaml +++ b/.github/workflows/pandas-tests.yaml @@ -17,7 +17,7 @@ jobs: pandas-tests: # run the Pandas unit tests secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a8f5023ef76..a8c4e481683 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -40,7 +40,7 @@ jobs: - pandas-tests-diff - telemetry-setup secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} @@ -55,7 +55,7 @@ jobs: changed-files: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -103,40 +103,40 @@ jobs: checks: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false ignored_pr_jobs: "telemetry-summarize" conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-cudf-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -145,7 +145,7 @@ jobs: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -153,7 +153,7 @@ jobs: conda-java-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java with: build_type: pull-request @@ -164,7 +164,7 @@ jobs: static-configure: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request # Use the wheel container so we can skip conda solves and since our @@ -174,7 +174,7 @@ jobs: conda-notebook-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks with: build_type: pull-request @@ -185,7 +185,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -195,7 +195,7 @@ jobs: wheel-build-libcudf: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -204,21 +204,21 @@ jobs: wheel-build-pylibcudf: needs: [checks, wheel-build-libcudf] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: "ci/build_wheel_pylibcudf.sh" wheel-build-cudf: needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -226,7 +226,7 @@ jobs: wheel-build-cudf-polars: needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -235,7 +235,7 @@ jobs: wheel-tests-cudf-polars: needs: [wheel-build-cudf-polars, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -245,7 +245,7 @@ jobs: cudf-polars-polars-tests: needs: wheel-build-cudf-polars secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -254,7 +254,7 @@ jobs: wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -263,7 +263,7 @@ jobs: wheel-tests-dask-cudf: needs: [wheel-build-dask-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -272,8 +272,8 @@ jobs: script: ci/test_wheel_dask_cudf.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 needs: telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.5"]' @@ -284,7 +284,7 @@ jobs: unit-tests-cudf-pandas: needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -295,7 +295,7 @@ jobs: # run the Pandas unit tests using PR branch needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". @@ -307,7 +307,7 @@ jobs: pandas-tests-diff: # diff the results of running the Pandas unit tests and publish a job summary needs: pandas-tests - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: node_type: cpu4 build_type: pull-request diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml index 6f0e88fb245..20db9623e1b 100644 --- a/.github/workflows/pr_issue_status_automation.yml +++ b/.github/workflows/pr_issue_status_automation.yml @@ -23,7 +23,7 @@ on: jobs: get-project-id: - uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-25.02 if: github.event.pull_request.state == 'open' secrets: inherit permissions: @@ -34,7 +34,7 @@ jobs: update-status: # This job sets the PR and its linked issues to "In Progress" status - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.02 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: @@ -50,7 +50,7 @@ jobs: update-sprint: # This job sets the PR and its linked issues to the current "Weekly Sprint" - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-25.02 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: @@ -79,7 +79,7 @@ jobs: update-release: # This job sets the PR and its linked issues to the release they are targeting - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.02 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: [get-project-id, process-branch-name] with: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3be07480b15..858352f515d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -45,7 +45,7 @@ jobs: run_script: "ci/test_cpp_memcheck.sh" static-configure: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: run_script: "ci/configure_cpp_static.sh" cpp-linters: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -67,7 +67,7 @@ jobs: file_to_upload: iwyu_results.txt conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -77,7 +77,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -86,7 +86,7 @@ jobs: script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -98,7 +98,7 @@ jobs: run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -110,7 +110,7 @@ jobs: run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -119,7 +119,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -128,7 +128,7 @@ jobs: script: ci/test_wheel_dask_cudf.sh unit-tests-cudf-pandas: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -137,7 +137,7 @@ jobs: script: ci/cudf_pandas_scripts/run_tests.sh third-party-integration-tests-cudf-pandas: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -149,7 +149,7 @@ jobs: ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml wheel-tests-cudf-polars: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -158,7 +158,7 @@ jobs: script: "ci/test_wheel_cudf_polars.sh" cudf-polars-polars-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 169d2e4eded..b83d2140a33 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge ```bash conda install -c rapidsai -c conda-forge -c nvidia \ - cudf=24.12 python=3.12 cuda-version=12.5 + cudf=25.02 python=3.12 cuda-version=12.5 ``` We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/VERSION b/VERSION index af28c42b528..72eefaf7c79 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.12.00 +25.02.00 diff --git a/ci/run_cudf_polars_polars_tests.sh b/ci/run_cudf_polars_polars_tests.sh index 49437510c7e..b1bfac2a1dd 100755 --- a/ci/run_cudf_polars_polars_tests.sh +++ b/ci/run_cudf_polars_polars_tests.sh @@ -13,6 +13,8 @@ DESELECTED_TESTS=( "tests/unit/test_cpu_check.py::test_check_cpu_flags_skipped_no_flags" # Mock library error "tests/docs/test_user_guide.py" # No dot binary in CI image "tests/unit/test_polars_import.py::test_fork_safety" # test started to hang in polars-1.14 + "tests/unit/operations/test_join.py::test_join_4_columns_with_validity" # fails in some systems, see https://github.com/pola-rs/polars/issues/19870 + "tests/unit/io/test_csv.py::test_read_web_file" # fails in rockylinux8 due to SSL CA issues ) if [[ $(arch) == "aarch64" ]]; then diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index d21497c4def..ecc490b378b 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -26,7 +26,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.12.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 @@ -42,9 +42,9 @@ dependencies: - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 - libcurand=10.3.0.86 -- libkvikio==24.12.*,>=0.0.0a0 +- libkvikio==25.2.*,>=0.0.0a0 - librdkafka>=2.5.0,<2.6.0a0 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - mmh3 - moto>=4.0.8 @@ -81,9 +81,9 @@ dependencies: - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - rich -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - s3fs>=2022.3.0 - scikit-build-core>=0.10.0 - scipy diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 400c1195e00..4559829ac3a 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -27,7 +27,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.12.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 @@ -41,9 +41,9 @@ dependencies: - jupyter_client - libcufile-dev - libcurand-dev -- libkvikio==24.12.*,>=0.0.0a0 +- libkvikio==25.2.*,>=0.0.0a0 - librdkafka>=2.5.0,<2.6.0a0 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - mmh3 - moto>=4.0.8 @@ -79,9 +79,9 @@ dependencies: - python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - rich -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - s3fs>=2022.3.0 - scikit-build-core>=0.10.0 - scipy diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 7fdaff35525..3e52c502113 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -286,6 +286,12 @@ ConfigureNVBench( ConfigureBench(HASHING_BENCH hashing/partition.cpp) ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp) +# ################################################################################################## +# * interop benchmark ------------------------------------------------------------------------------ +ConfigureNVBench(INTEROP_NVBENCH interop/interop.cpp) +target_link_libraries(INTEROP_NVBENCH PRIVATE nanoarrow) +target_include_directories(INTEROP_NVBENCH PRIVATE ${CMAKE_SOURCE_DIR}/tests/interop) + # ################################################################################################## # * merge benchmark ------------------------------------------------------------------------------- ConfigureBench(MERGE_BENCH merge/merge.cpp) @@ -354,7 +360,7 @@ ConfigureNVBench( # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- -ConfigureBench(STRINGS_BENCH string/factory.cu string/repeat_strings.cpp string/url_decode.cu) +ConfigureBench(STRINGS_BENCH string/factory.cu string/repeat_strings.cpp) ConfigureNVBench( STRINGS_NVBENCH @@ -385,6 +391,7 @@ ConfigureNVBench( string/split.cpp string/split_re.cpp string/translate.cpp + string/url_decode.cu ) # ################################################################################################## diff --git a/cpp/benchmarks/interop/interop.cpp b/cpp/benchmarks/interop/interop.cpp new file mode 100644 index 00000000000..dad7e6f429e --- /dev/null +++ b/cpp/benchmarks/interop/interop.cpp @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +template +void BM_to_arrow_device(nvbench::state& state, nvbench::type_list>) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_columns = static_cast(state.get_int64("num_columns")); + auto const num_elements = static_cast(num_rows) * num_columns; + + std::vector types(num_columns, data_type); + + auto const table = create_random_table(types, row_count{num_rows}); + int64_t const size_bytes = estimate_size(table->view()); + + state.add_element_count(num_elements, "num_elements"); + state.add_global_memory_reads(size_bytes); + state.add_global_memory_writes(size_bytes); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::to_arrow_device(table->view(), rmm::cuda_stream_view{launch.get_stream()}); + }); +} + +template +void BM_to_arrow_host(nvbench::state& state, nvbench::type_list>) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_columns = static_cast(state.get_int64("num_columns")); + auto const num_elements = static_cast(num_rows) * num_columns; + + std::vector types(num_columns, data_type); + + auto const table = create_random_table(types, row_count{num_rows}); + int64_t const size_bytes = estimate_size(table->view()); + + state.add_element_count(num_elements, "num_elements"); + state.add_global_memory_reads(size_bytes); + state.add_global_memory_writes(size_bytes); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::to_arrow_host(table->view(), rmm::cuda_stream_view{launch.get_stream()}); + }); +} + +template +void BM_from_arrow_device(nvbench::state& state, nvbench::type_list>) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_columns = static_cast(state.get_int64("num_columns")); + auto const num_elements = static_cast(num_rows) * num_columns; + + std::vector types(num_columns, data_type); + + data_profile profile; + profile.set_struct_depth(1); + profile.set_list_depth(1); + + auto const table = create_random_table(types, row_count{num_rows}, profile); + cudf::table_view table_view = table->view(); + int64_t const size_bytes = estimate_size(table_view); + + std::vector table_metadata; + + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_columns), + std::back_inserter(table_metadata), + [&](auto const column) { + cudf::column_metadata column_metadata{""}; + column_metadata.children_meta = std::vector( + table->get_column(column).num_children(), cudf::column_metadata{""}); + return column_metadata; + }); + + cudf::unique_schema_t schema = cudf::to_arrow_schema(table_view, table_metadata); + cudf::unique_device_array_t input = cudf::to_arrow_device(table_view); + + state.add_element_count(num_elements, "num_elements"); + state.add_global_memory_reads(size_bytes); + state.add_global_memory_writes(size_bytes); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::from_arrow_device_column( + schema.get(), input.get(), rmm::cuda_stream_view{launch.get_stream()}); + }); +} + +template +void BM_from_arrow_host(nvbench::state& state, nvbench::type_list>) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_columns = static_cast(state.get_int64("num_columns")); + auto const num_elements = static_cast(num_rows) * num_columns; + + std::vector types(num_columns, data_type); + + data_profile profile; + profile.set_struct_depth(1); + profile.set_list_depth(1); + + auto const table = create_random_table(types, row_count{num_rows}, profile); + cudf::table_view table_view = table->view(); + int64_t const size_bytes = estimate_size(table_view); + + std::vector table_metadata; + + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_columns), + std::back_inserter(table_metadata), + [&](auto const column) { + cudf::column_metadata column_metadata{""}; + column_metadata.children_meta = std::vector( + table->get_column(column).num_children(), cudf::column_metadata{""}); + return column_metadata; + }); + + cudf::unique_schema_t schema = cudf::to_arrow_schema(table_view, table_metadata); + cudf::unique_device_array_t input = cudf::to_arrow_host(table_view); + + state.add_element_count(num_elements, "num_elements"); + state.add_global_memory_reads(size_bytes); + state.add_global_memory_writes(size_bytes); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::from_arrow_host_column( + schema.get(), input.get(), rmm::cuda_stream_view{launch.get_stream()}); + }); +} + +using data_types = nvbench::enum_type_list; + +static char const* stringify_type(cudf::type_id value) +{ + switch (value) { + case cudf::type_id::INT8: return "INT8"; + case cudf::type_id::INT16: return "INT16"; + case cudf::type_id::INT32: return "INT32"; + case cudf::type_id::INT64: return "INT64"; + case cudf::type_id::UINT8: return "UINT8"; + case cudf::type_id::UINT16: return "UINT16"; + case cudf::type_id::UINT32: return "UINT32"; + case cudf::type_id::UINT64: return "UINT64"; + case cudf::type_id::FLOAT32: return "FLOAT32"; + case cudf::type_id::FLOAT64: return "FLOAT64"; + case cudf::type_id::BOOL8: return "BOOL8"; + case cudf::type_id::TIMESTAMP_DAYS: return "TIMESTAMP_DAYS"; + case cudf::type_id::TIMESTAMP_SECONDS: return "TIMESTAMP_SECONDS"; + case cudf::type_id::TIMESTAMP_MILLISECONDS: return "TIMESTAMP_MILLISECONDS"; + case cudf::type_id::TIMESTAMP_MICROSECONDS: return "TIMESTAMP_MICROSECONDS"; + case cudf::type_id::TIMESTAMP_NANOSECONDS: return "TIMESTAMP_NANOSECONDS"; + case cudf::type_id::DURATION_DAYS: return "DURATION_DAYS"; + case cudf::type_id::DURATION_SECONDS: return "DURATION_SECONDS"; + case cudf::type_id::DURATION_MILLISECONDS: return "DURATION_MILLISECONDS"; + case cudf::type_id::DURATION_MICROSECONDS: return "DURATION_MICROSECONDS"; + case cudf::type_id::DURATION_NANOSECONDS: return "DURATION_NANOSECONDS"; + case cudf::type_id::DICTIONARY32: return "DICTIONARY32"; + case cudf::type_id::STRING: return "STRING"; + case cudf::type_id::LIST: return "LIST"; + case cudf::type_id::DECIMAL32: return "DECIMAL32"; + case cudf::type_id::DECIMAL64: return "DECIMAL64"; + case cudf::type_id::DECIMAL128: return "DECIMAL128"; + case cudf::type_id::STRUCT: return "STRUCT"; + default: return "unknown"; + } +} + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS(cudf::type_id, stringify_type, stringify_type) + +NVBENCH_BENCH_TYPES(BM_to_arrow_host, NVBENCH_TYPE_AXES(data_types)) + .set_type_axes_names({"data_type"}) + .set_name("to_arrow_host") + .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000}) + .add_int64_axis("num_columns", {1}); + +NVBENCH_BENCH_TYPES(BM_to_arrow_device, NVBENCH_TYPE_AXES(data_types)) + .set_type_axes_names({"data_type"}) + .set_name("to_arrow_device") + .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000}) + .add_int64_axis("num_columns", {1}); + +NVBENCH_BENCH_TYPES(BM_from_arrow_host, NVBENCH_TYPE_AXES(data_types)) + .set_type_axes_names({"data_type"}) + .set_name("from_arrow_host") + .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000}) + .add_int64_axis("num_columns", {1}); + +NVBENCH_BENCH_TYPES(BM_from_arrow_device, NVBENCH_TYPE_AXES(data_types)) + .set_type_axes_names({"data_type"}) + .set_name("from_arrow_device") + .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000}) + .add_int64_axis("num_columns", {1}); diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp index 4366790f208..678f2f1a600 100644 --- a/cpp/benchmarks/io/json/json_reader_input.cpp +++ b/cpp/benchmarks/io/json/json_reader_input.cpp @@ -24,17 +24,19 @@ #include -// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to -// run on most GPUs, but large enough to allow highest throughput -constexpr size_t data_size = 512 << 20; +// Default size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks +// to run on most GPUs, but large enough to allow highest throughput +constexpr size_t default_data_size = 512 << 20; constexpr cudf::size_type num_cols = 64; void json_read_common(cuio_source_sink_pair& source_sink, cudf::size_type num_rows_to_read, - nvbench::state& state) + nvbench::state& state, + cudf::io::compression_type comptype = cudf::io::compression_type::NONE, + size_t data_size = default_data_size) { cudf::io::json_reader_options read_opts = - cudf::io::json_reader_options::builder(source_sink.make_source_info()); + cudf::io::json_reader_options::builder(source_sink.make_source_info()).compression(comptype); auto mem_stats_logger = cudf::memory_stats_logger(); state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); @@ -57,15 +59,21 @@ void json_read_common(cuio_source_sink_pair& source_sink, state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); } -cudf::size_type json_write_bm_data(cudf::io::sink_info sink, - std::vector const& dtypes) +cudf::size_type json_write_bm_data( + cudf::io::sink_info sink, + std::vector const& dtypes, + cudf::io::compression_type comptype = cudf::io::compression_type::NONE, + size_t data_size = default_data_size) { auto const tbl = create_random_table( cycle_dtypes(dtypes, num_cols), table_size_bytes{data_size}, data_profile_builder()); auto const view = tbl->view(); cudf::io::json_writer_options const write_opts = - cudf::io::json_writer_options::builder(sink, view).na_rep("null").rows_per_chunk(100'000); + cudf::io::json_writer_options::builder(sink, view) + .na_rep("null") + .rows_per_chunk(100'000) + .compression(comptype); cudf::io::write_json(write_opts); return view.num_rows(); } @@ -87,6 +95,26 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list +void BM_json_read_compressed_io( + nvbench::state& state, nvbench::type_list, nvbench::enum_type>) +{ + size_t const data_size = state.get_int64("data_size"); + cuio_source_sink_pair source_sink(IO); + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + auto const num_rows = + json_write_bm_data(source_sink.make_sink_info(), d_type, comptype, data_size); + + json_read_common(source_sink, num_rows, state, comptype, data_size); +} + template void BM_json_read_data_type( nvbench::state& state, nvbench::type_list, nvbench::enum_type>) @@ -110,8 +138,9 @@ using d_type_list = nvbench::enum_type_list; -using compression_list = - nvbench::enum_type_list; +using compression_list = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_json_read_data_type, NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list)) @@ -123,3 +152,10 @@ NVBENCH_BENCH_TYPES(BM_json_read_io, NVBENCH_TYPE_AXES(io_list)) .set_name("json_read_io") .set_type_axes_names({"io"}) .set_min_samples(4); + +NVBENCH_BENCH_TYPES(BM_json_read_compressed_io, + NVBENCH_TYPE_AXES(compression_list, nvbench::enum_type_list)) + .set_name("json_read_compressed_io") + .set_type_axes_names({"compression_type", "io"}) + .add_int64_power_of_two_axis("data_size", nvbench::range(20, 29, 1)) + .set_min_samples(4); diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp index cc548ccd3de..011b2590c6f 100644 --- a/cpp/benchmarks/io/nvbench_helpers.hpp +++ b/cpp/benchmarks/io/nvbench_helpers.hpp @@ -76,6 +76,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS( [](auto value) { switch (value) { case cudf::io::compression_type::SNAPPY: return "SNAPPY"; + case cudf::io::compression_type::GZIP: return "GZIP"; case cudf::io::compression_type::NONE: return "NONE"; default: return "Unknown"; } diff --git a/cpp/benchmarks/string/url_decode.cu b/cpp/benchmarks/string/url_decode.cu index 7720e585023..cee2a246838 100644 --- a/cpp/benchmarks/string/url_decode.cu +++ b/cpp/benchmarks/string/url_decode.cu @@ -15,48 +15,40 @@ */ #include -#include - -#include +#include +#include #include -#include #include +#include #include #include -#include #include #include #include #include -#include #include -#include + +#include struct url_string_generator { - char* chars; + cudf::column_device_view d_strings; double esc_seq_chance; thrust::minstd_rand engine; - thrust::uniform_real_distribution esc_seq_dist; - url_string_generator(char* c, double esc_seq_chance, thrust::minstd_rand& engine) - : chars(c), esc_seq_chance(esc_seq_chance), engine(engine), esc_seq_dist(0, 1) - { - } + thrust::uniform_real_distribution esc_seq_dist{0, 1}; - __device__ void operator()(thrust::tuple str_begin_end) + __device__ void operator()(cudf::size_type idx) { - auto begin = thrust::get<0>(str_begin_end); - auto end = thrust::get<1>(str_begin_end); - engine.discard(begin); - for (auto i = begin; i < end; ++i) { - if (esc_seq_dist(engine) < esc_seq_chance and i < end - 3) { + engine.discard(idx); + auto d_str = d_strings.element(idx); + auto chars = const_cast(d_str.data()); + for (auto i = 0; i < d_str.size_bytes() - 3; ++i) { + if (esc_seq_dist(engine) < esc_seq_chance) { chars[i] = '%'; chars[i + 1] = '2'; chars[i + 2] = '0'; i += 2; - } else { - chars[i] = 'a'; } } } @@ -64,50 +56,44 @@ struct url_string_generator { auto generate_column(cudf::size_type num_rows, cudf::size_type chars_per_row, double esc_seq_chance) { - std::vector strings{std::string(chars_per_row, 'a')}; - auto col_1a = cudf::test::strings_column_wrapper(strings.begin(), strings.end()); - auto table_a = cudf::repeat(cudf::table_view{{col_1a}}, num_rows); - auto result_col = std::move(table_a->release()[0]); // string column with num_rows aaa... - auto chars_data = static_cast(result_col->mutable_view().head()); - auto offset_col = result_col->child(cudf::strings_column_view::offsets_column_index).view(); - auto offset_itr = cudf::detail::offsetalator_factory::make_input_iterator(offset_col); + auto str_row = std::string(chars_per_row, 'a'); + auto result_col = cudf::make_column_from_scalar(cudf::string_scalar(str_row), num_rows); + auto d_strings = cudf::column_device_view::create(result_col->view()); auto engine = thrust::default_random_engine{}; thrust::for_each_n(thrust::device, - thrust::make_zip_iterator(offset_itr, offset_itr + 1), + thrust::counting_iterator(0), num_rows, - url_string_generator{chars_data, esc_seq_chance, engine}); + url_string_generator{*d_strings, esc_seq_chance, engine}); return result_col; } -class UrlDecode : public cudf::benchmark {}; - -void BM_url_decode(benchmark::State& state, int esc_seq_pct) +static void bench_url_decode(nvbench::state& state) { - cudf::size_type const num_rows = state.range(0); - cudf::size_type const chars_per_row = state.range(1); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const esc_seq_pct = static_cast(state.get_int64("esc_seq_pct")); + + auto column = generate_column(num_rows, row_width, esc_seq_pct / 100.0); + auto input = cudf::strings_column_view(column->view()); - auto column = generate_column(num_rows, chars_per_row, esc_seq_pct / 100.0); - auto strings_view = cudf::strings_column_view(column->view()); + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = input.chars_size(stream); + state.add_global_memory_reads(chars_size); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - auto result = cudf::strings::url_decode(strings_view); + { + auto result = cudf::strings::url_decode(input); + auto sv = cudf::strings_column_view(result->view()); + state.add_global_memory_writes(sv.chars_size(stream)); } - state.SetBytesProcessed(state.iterations() * num_rows * - (chars_per_row + sizeof(cudf::size_type))); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::url_decode(input); }); } -#define URLD_BENCHMARK_DEFINE(esc_seq_pct) \ - BENCHMARK_DEFINE_F(UrlDecode, esc_seq_pct) \ - (::benchmark::State & st) { BM_url_decode(st, esc_seq_pct); } \ - BENCHMARK_REGISTER_F(UrlDecode, esc_seq_pct) \ - ->Args({100000000, 10}) \ - ->Args({10000000, 100}) \ - ->Args({1000000, 1000}) \ - ->Unit(benchmark::kMillisecond) \ - ->UseManualTime(); - -URLD_BENCHMARK_DEFINE(10) -URLD_BENCHMARK_DEFINE(50) +NVBENCH_BENCH(bench_url_decode) + .set_name("url_decode") + .add_int64_axis("row_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}) + .add_int64_axis("esc_seq_pct", {10, 50}); diff --git a/cpp/examples/versions.cmake b/cpp/examples/versions.cmake index 51613090534..13e0cf81625 100644 --- a/cpp/examples/versions.cmake +++ b/cpp/examples/versions.cmake @@ -12,4 +12,4 @@ # the License. # ============================================================================= -set(CUDF_TAG branch-24.12) +set(CUDF_TAG branch-25.02) diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index 3307b4fa539..cea0ebad8f5 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -56,7 +56,8 @@ void set_up_kvikio() { static std::once_flag flag{}; std::call_once(flag, [] { - auto const compat_mode = kvikio::detail::getenv_or("KVIKIO_COMPAT_MODE", true); + auto const compat_mode = + kvikio::detail::getenv_or("KVIKIO_COMPAT_MODE", kvikio::CompatMode::ON); kvikio::defaults::compat_mode_reset(compat_mode); auto const nthreads = getenv_or("KVIKIO_NTHREADS", 4u); diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index 68377ad6d5f..b37a5ac900a 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -45,7 +45,7 @@ class file_sink : public data_sink { cufile_integration::set_up_kvikio(); _kvikio_file = kvikio::FileHandle(filepath, "w"); CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.", - _kvikio_file.is_compat_mode_on() ? "on" : "off"); + _kvikio_file.is_compat_mode_preferred() ? "on" : "off"); } else { _cufile_out = detail::make_cufile_output(filepath); } diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 0870e4a84a7..10814eea458 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -56,7 +56,7 @@ class file_source : public datasource { cufile_integration::set_up_kvikio(); _kvikio_file = kvikio::FileHandle(filepath); CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.", - _kvikio_file.is_compat_mode_on() ? "on" : "off"); + _kvikio_file.is_compat_mode_preferred() ? "on" : "off"); } else { _cufile_in = detail::make_cufile_input(filepath); } diff --git a/dependencies.yaml b/dependencies.yaml index 682aaa612b4..631ce12f0b0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -612,7 +612,7 @@ dependencies: - output_types: [conda] packages: - breathe>=4.35.0 - - dask-cuda==24.12.*,>=0.0.0a0 + - dask-cuda==25.2.*,>=0.0.0a0 - *doxygen - make - myst-nb @@ -753,12 +753,12 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - rapids-dask-dependency==24.12.*,>=0.0.0a0 + - rapids-dask-dependency==25.2.*,>=0.0.0a0 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] packages: - - rapids-dask-dependency==24.12.*,>=0.0.0a0 + - rapids-dask-dependency==25.2.*,>=0.0.0a0 run_custreamz: common: - output_types: conda @@ -898,7 +898,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask-cuda==24.12.*,>=0.0.0a0 + - dask-cuda==25.2.*,>=0.0.0a0 - *numba-cuda-dep specific: - output_types: [conda, requirements] @@ -918,7 +918,7 @@ dependencies: common: - output_types: conda packages: - - &libcudf_unsuffixed libcudf==24.12.*,>=0.0.0a0 + - &libcudf_unsuffixed libcudf==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -932,18 +932,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcudf-cu12==24.12.*,>=0.0.0a0 + - libcudf-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - libcudf-cu11==24.12.*,>=0.0.0a0 + - libcudf-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*libcudf_unsuffixed]} depends_on_pylibcudf: common: - output_types: conda packages: - - &pylibcudf_unsuffixed pylibcudf==24.12.*,>=0.0.0a0 + - &pylibcudf_unsuffixed pylibcudf==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -957,18 +957,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibcudf-cu12==24.12.*,>=0.0.0a0 + - pylibcudf-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibcudf-cu11==24.12.*,>=0.0.0a0 + - pylibcudf-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*pylibcudf_unsuffixed]} depends_on_cudf: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==24.12.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -982,18 +982,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==24.12.*,>=0.0.0a0 + - cudf-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf-cu11==24.12.*,>=0.0.0a0 + - cudf-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_unsuffixed]} depends_on_cudf_kafka: common: - output_types: conda packages: - - &cudf_kafka_unsuffixed cudf_kafka==24.12.*,>=0.0.0a0 + - &cudf_kafka_unsuffixed cudf_kafka==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -1007,12 +1007,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf_kafka-cu12==24.12.*,>=0.0.0a0 + - cudf_kafka-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf_kafka-cu11==24.12.*,>=0.0.0a0 + - cudf_kafka-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_kafka_unsuffixed]} depends_on_cupy: common: @@ -1033,7 +1033,7 @@ dependencies: common: - output_types: conda packages: - - &libkvikio_unsuffixed libkvikio==24.12.*,>=0.0.0a0 + - &libkvikio_unsuffixed libkvikio==25.2.*,>=0.0.0a0 - output_types: requirements packages: - --extra-index-url=https://pypi.nvidia.com @@ -1045,12 +1045,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libkvikio-cu12==24.12.*,>=0.0.0a0 + - libkvikio-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - libkvikio-cu11==24.12.*,>=0.0.0a0 + - libkvikio-cu11==25.2.*,>=0.0.0a0 - matrix: packages: - *libkvikio_unsuffixed @@ -1058,7 +1058,7 @@ dependencies: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==24.12.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -1072,12 +1072,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==24.12.*,>=0.0.0a0 + - librmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - librmm-cu11==24.12.*,>=0.0.0a0 + - librmm-cu11==25.2.*,>=0.0.0a0 - matrix: packages: - *librmm_unsuffixed @@ -1085,7 +1085,7 @@ dependencies: common: - output_types: conda packages: - - &rmm_unsuffixed rmm==24.12.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -1099,12 +1099,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==24.12.*,>=0.0.0a0 + - rmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - rmm-cu11==24.12.*,>=0.0.0a0 + - rmm-cu11==25.2.*,>=0.0.0a0 - matrix: packages: - *rmm_unsuffixed diff --git a/java/ci/README.md b/java/ci/README.md index 95b93698cae..bfb35bc1d23 100644 --- a/java/ci/README.md +++ b/java/ci/README.md @@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.8.0-devel-rocky8 bash You can download the cuDF repo in the docker container or you can mount it into the container. Here I choose to download again in the container. ```bash -git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.12 +git clone --recursive https://github.com/rapidsai/cudf.git -b branch-25.02 ``` ### Build cuDF jar with devtoolset @@ -47,4 +47,4 @@ scl enable gcc-toolset-11 "java/ci/build-in-docker.sh" ### The output -You can find the cuDF jar in java/target/ like cudf-24.12.0-SNAPSHOT-cuda11.jar. +You can find the cuDF jar in java/target/ like cudf-25.02.0-SNAPSHOT-cuda11.jar. diff --git a/java/pom.xml b/java/pom.xml index 450cfbdbc84..892c9cfddb1 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -21,7 +21,7 @@ ai.rapids cudf - 24.12.0-SNAPSHOT + 25.02.0-SNAPSHOT cudfjni diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index 1589e23f716..a163bb07888 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -3,12 +3,10 @@ from numba.np import numpy_support import cudf -from cudf.core._internals.expressions import parse_expression from cudf.core.buffer import acquire_spill_lock, as_buffer from cudf.utils import cudautils from pylibcudf cimport transform as plc_transform -from pylibcudf.expressions cimport Expression from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column @@ -93,7 +91,7 @@ def one_hot_encode(Column input_column, Column categories): @acquire_spill_lock() -def compute_column(list columns, tuple column_names, expr: str): +def compute_column(list columns, tuple column_names, str expr): """Compute a new column by evaluating an expression on a set of columns. Parameters @@ -108,12 +106,8 @@ def compute_column(list columns, tuple column_names, expr: str): expr : str The expression to evaluate. """ - visitor = parse_expression(expr, column_names) - - # At the end, all the stack contains is the expression to evaluate. - cdef Expression cudf_expr = visitor.expression result = plc_transform.compute_column( plc.Table([col.to_pylibcudf(mode="read") for col in columns]), - cudf_expr, + plc.expressions.to_expression(expr, column_names), ) return Column.from_pylibcudf(result) diff --git a/python/cudf/cudf/core/_internals/expressions.py b/python/cudf/cudf/core/_internals/expressions.py deleted file mode 100644 index 90d9118027a..00000000000 --- a/python/cudf/cudf/core/_internals/expressions.py +++ /dev/null @@ -1,229 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -from __future__ import annotations - -import ast -import functools - -import pyarrow as pa - -import pylibcudf as plc -from pylibcudf.expressions import ( - ASTOperator, - ColumnReference, - Expression, - Literal, - Operation, -) - -# This dictionary encodes the mapping from Python AST operators to their cudf -# counterparts. -python_cudf_operator_map = { - # Binary operators - ast.Add: ASTOperator.ADD, - ast.Sub: ASTOperator.SUB, - ast.Mult: ASTOperator.MUL, - ast.Div: ASTOperator.DIV, - ast.FloorDiv: ASTOperator.FLOOR_DIV, - ast.Mod: ASTOperator.PYMOD, - ast.Pow: ASTOperator.POW, - ast.Eq: ASTOperator.EQUAL, - ast.NotEq: ASTOperator.NOT_EQUAL, - ast.Lt: ASTOperator.LESS, - ast.Gt: ASTOperator.GREATER, - ast.LtE: ASTOperator.LESS_EQUAL, - ast.GtE: ASTOperator.GREATER_EQUAL, - ast.BitXor: ASTOperator.BITWISE_XOR, - # TODO: The mapping of logical/bitwise operators here is inconsistent with - # pandas. In pandas, Both `BitAnd` and `And` map to - # `ASTOperator.LOGICAL_AND` for booleans, while they map to - # `ASTOperator.BITWISE_AND` for integers. However, there is no good way to - # encode this at present because expressions can be arbitrarily nested so - # we won't know the dtype of the input without inserting a much more - # complex traversal of the expression tree to determine the output types at - # each node. For now, we'll rely on users to use the appropriate operator. - ast.BitAnd: ASTOperator.BITWISE_AND, - ast.BitOr: ASTOperator.BITWISE_OR, - ast.And: ASTOperator.LOGICAL_AND, - ast.Or: ASTOperator.LOGICAL_OR, - # Unary operators - ast.Invert: ASTOperator.BIT_INVERT, - ast.Not: ASTOperator.NOT, - # TODO: Missing USub, possibility other unary ops? -} - - -# Mapping between Python function names encode in an ast.Call node and the -# corresponding libcudf C++ AST operators. -python_cudf_function_map = { - # TODO: Operators listed on - # https://pandas.pydata.org/pandas-docs/stable/user_guide/enhancingperf.html#expression-evaluation-via-eval # noqa: E501 - # that we don't support yet: - # expm1, log1p, arctan2 and log10. - "isnull": ASTOperator.IS_NULL, - "isna": ASTOperator.IS_NULL, - "sin": ASTOperator.SIN, - "cos": ASTOperator.COS, - "tan": ASTOperator.TAN, - "arcsin": ASTOperator.ARCSIN, - "arccos": ASTOperator.ARCCOS, - "arctan": ASTOperator.ARCTAN, - "sinh": ASTOperator.SINH, - "cosh": ASTOperator.COSH, - "tanh": ASTOperator.TANH, - "arcsinh": ASTOperator.ARCSINH, - "arccosh": ASTOperator.ARCCOSH, - "arctanh": ASTOperator.ARCTANH, - "exp": ASTOperator.EXP, - "log": ASTOperator.LOG, - "sqrt": ASTOperator.SQRT, - "abs": ASTOperator.ABS, - "ceil": ASTOperator.CEIL, - "floor": ASTOperator.FLOOR, - # TODO: Operators supported by libcudf with no Python function analog. - # ast.rint: ASTOperator.RINT, - # ast.cbrt: ASTOperator.CBRT, -} - - -class libcudfASTVisitor(ast.NodeVisitor): - """A NodeVisitor specialized for constructing a libcudf expression tree. - - This visitor is designed to handle AST nodes that have libcudf equivalents. - It constructs column references from names and literals from constants, - then builds up operations. The final result can be accessed using the - `expression` property. The visitor must be kept in scope for as long as the - expression is needed because all of the underlying libcudf expressions will - be destroyed when the libcudfASTVisitor is. - - Parameters - ---------- - col_names : Tuple[str] - The column names used to map the names in an expression. - """ - - def __init__(self, col_names: tuple[str]): - self.stack: list[Expression] = [] - self.nodes: list[Expression] = [] - self.col_names = col_names - - @property - def expression(self): - """Expression: The result of parsing an AST.""" - assert len(self.stack) == 1 - return self.stack[-1] - - def visit_Name(self, node): - try: - col_id = self.col_names.index(node.id) - except ValueError: - raise ValueError(f"Unknown column name {node.id}") - self.stack.append(ColumnReference(col_id)) - - def visit_Constant(self, node): - if not isinstance(node.value, (float, int, str, complex)): - raise ValueError( - f"Unsupported literal {repr(node.value)} of type " - "{type(node.value).__name__}" - ) - self.stack.append( - Literal(plc.interop.from_arrow(pa.scalar(node.value))) - ) - - def visit_UnaryOp(self, node): - self.visit(node.operand) - self.nodes.append(self.stack.pop()) - if isinstance(node.op, ast.USub): - # TODO: Except for leaf nodes, we won't know the type of the - # operand, so there's no way to know whether this should be a float - # or an int. We should maybe see what Spark does, and this will - # probably require casting. - self.nodes.append(Literal(plc.interop.from_arrow(pa.scalar(-1)))) - op = ASTOperator.MUL - self.stack.append(Operation(op, self.nodes[-1], self.nodes[-2])) - elif isinstance(node.op, ast.UAdd): - self.stack.append(self.nodes[-1]) - else: - op = python_cudf_operator_map[type(node.op)] - self.stack.append(Operation(op, self.nodes[-1])) - - def visit_BinOp(self, node): - self.visit(node.left) - self.visit(node.right) - self.nodes.append(self.stack.pop()) - self.nodes.append(self.stack.pop()) - - op = python_cudf_operator_map[type(node.op)] - self.stack.append(Operation(op, self.nodes[-1], self.nodes[-2])) - - def _visit_BoolOp_Compare(self, operators, operands, has_multiple_ops): - # Helper function handling the common components of parsing BoolOp and - # Compare AST nodes. These two types of nodes both support chaining - # (e.g. `a > b > c` is equivalent to `a > b and b > c`, so this - # function helps standardize that. - - # TODO: Whether And/Or and BitAnd/BitOr actually correspond to - # logical or bitwise operators depends on the data types that they - # are applied to. We'll need to add logic to map to that. - inner_ops = [] - for op, (left, right) in zip(operators, operands): - # Note that this will lead to duplicate nodes, e.g. if - # the comparison is `a < b < c` that will be encoded as - # `a < b and b < c`. We could potentially optimize by caching - # expressions by name so that we only construct them once. - self.visit(left) - self.visit(right) - - self.nodes.append(self.stack.pop()) - self.nodes.append(self.stack.pop()) - - op = python_cudf_operator_map[type(op)] - inner_ops.append(Operation(op, self.nodes[-1], self.nodes[-2])) - - self.nodes.extend(inner_ops) - - # If we have more than one comparator, we need to link them - # together with LOGICAL_AND operators. - if has_multiple_ops: - op = ASTOperator.LOGICAL_AND - - def _combine_compare_ops(left, right): - self.nodes.append(Operation(op, left, right)) - return self.nodes[-1] - - functools.reduce(_combine_compare_ops, inner_ops) - - self.stack.append(self.nodes[-1]) - - def visit_BoolOp(self, node): - operators = [node.op] * (len(node.values) - 1) - operands = zip(node.values[:-1], node.values[1:]) - self._visit_BoolOp_Compare(operators, operands, len(node.values) > 2) - - def visit_Compare(self, node): - operands = (node.left, *node.comparators) - has_multiple_ops = len(operands) > 2 - operands = zip(operands[:-1], operands[1:]) - self._visit_BoolOp_Compare(node.ops, operands, has_multiple_ops) - - def visit_Call(self, node): - try: - op = python_cudf_function_map[node.func.id] - except KeyError: - raise ValueError(f"Unsupported function {node.func}.") - # Assuming only unary functions are supported, which is checked above. - if len(node.args) != 1 or node.keywords: - raise ValueError( - f"Function {node.func} only accepts one positional " - "argument." - ) - self.visit(node.args[0]) - - self.nodes.append(self.stack.pop()) - self.stack.append(Operation(op, self.nodes[-1])) - - -@functools.lru_cache(256) -def parse_expression(expr: str, col_names: tuple[str]): - visitor = libcudfASTVisitor(col_names) - visitor.visit(ast.parse(expr)) - return visitor diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 3d132c92d54..016bd1225cd 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -961,7 +961,11 @@ def _merge_sorted( ) -def _pivot(col_accessor: ColumnAccessor, index, columns) -> cudf.DataFrame: +def _pivot( + col_accessor: ColumnAccessor, + index: cudf.Index | cudf.MultiIndex, + columns: cudf.Index | cudf.MultiIndex, +) -> cudf.DataFrame: """ Reorganize the values of the DataFrame according to the given index and columns. @@ -1012,12 +1016,12 @@ def as_tuple(x): level_names=(None,) + columns._column_names, verify=False, ) - return cudf.DataFrame._from_data( - ca, index=cudf.Index(index_labels, name=index.name) - ) + return cudf.DataFrame._from_data(ca, index=index_labels) -def pivot(data, columns=None, index=no_default, values=no_default): +def pivot( + data: cudf.DataFrame, columns=None, index=no_default, values=no_default +) -> cudf.DataFrame: """ Return reshaped DataFrame organized by the given index and column values. @@ -1027,10 +1031,10 @@ def pivot(data, columns=None, index=no_default, values=no_default): Parameters ---------- - columns : column name, optional - Column used to construct the columns of the result. - index : column name, optional - Column used to construct the index of the result. + columns : scalar or list of scalars, optional + Column label(s) used to construct the columns of the result. + index : scalar or list of scalars, optional + Column label(s) used to construct the index of the result. values : column name or list of column names, optional Column(s) whose values are rearranged to produce the result. If not specified, all remaining columns of the DataFrame @@ -1069,24 +1073,46 @@ def pivot(data, columns=None, index=no_default, values=no_default): """ values_is_list = True if values is no_default: + already_selected = set( + itertools.chain( + [index] if is_scalar(index) else index, + [columns] if is_scalar(columns) else columns, + ) + ) cols_to_select = [ - col for col in data._column_names if col not in (index, columns) + col for col in data._column_names if col not in already_selected ] elif not isinstance(values, (list, tuple)): cols_to_select = [values] values_is_list = False else: - cols_to_select = values + cols_to_select = values # type: ignore[assignment] if index is no_default: - index = data.index + index_data = data.index else: - index = cudf.Index(data.loc[:, index]) - columns = cudf.Index(data.loc[:, columns]) + index_data = data.loc[:, index] + if index_data.ndim == 2: + index_data = cudf.MultiIndex.from_frame(index_data) + if not is_scalar(index) and len(index) == 1: + # pandas converts single level MultiIndex to Index + index_data = index_data.get_level_values(0) + else: + index_data = cudf.Index(index_data) + + column_data = data.loc[:, columns] + if column_data.ndim == 2: + column_data = cudf.MultiIndex.from_frame(column_data) + else: + column_data = cudf.Index(column_data) # Create a DataFrame composed of columns from both # columns and index ca = ColumnAccessor( - dict(enumerate(itertools.chain(index._columns, columns._columns))), + dict( + enumerate( + itertools.chain(index_data._columns, column_data._columns) + ) + ), verify=False, ) columns_index = cudf.DataFrame._from_data(ca) @@ -1095,7 +1121,9 @@ def pivot(data, columns=None, index=no_default, values=no_default): if len(columns_index) != len(columns_index.drop_duplicates()): raise ValueError("Duplicate index-column pairs found. Cannot reshape.") - result = _pivot(data._data.select_by_label(cols_to_select), index, columns) + result = _pivot( + data._data.select_by_label(cols_to_select), index_data, column_data + ) # MultiIndex to Index if not values_is_list: diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index ce99f98b559..750c6cec180 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -368,6 +368,14 @@ def _process_dataset( file_list = paths if len(paths) == 1 and ioutils.is_directory(paths[0]): paths = ioutils.stringify_pathlike(paths[0]) + elif ( + filters is None + and isinstance(dataset_kwargs, dict) + and dataset_kwargs.get("partitioning") is None + ): + # Skip dataset processing if we have no filters + # or hive/directory partitioning to deal with. + return paths, row_groups, [], {} # Convert filters to ds.Expression if filters is not None: diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 26386abb05d..53fe5f7f30d 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -835,3 +835,20 @@ def test_crosstab_simple(): expected = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]) actual = cudf.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]) assert_eq(expected, actual, check_dtype=False) + + +@pytest.mark.parametrize("index", [["ix"], ["ix", "foo"]]) +@pytest.mark.parametrize("columns", [["col"], ["col", "baz"]]) +def test_pivot_list_like_index_columns(index, columns): + data = { + "bar": ["x", "y", "z", "w"], + "col": ["a", "b", "a", "b"], + "foo": [1, 2, 3, 4], + "ix": [1, 1, 2, 2], + "baz": [0, 0, 0, 0], + } + pd_df = pd.DataFrame(data) + cudf_df = cudf.DataFrame(data) + result = cudf_df.pivot(columns=columns, index=index) + expected = pd_df.pivot(columns=columns, index=index) + assert_eq(result, expected) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml index 2c7330d5ee6..6b317cc13fb 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml @@ -189,7 +189,7 @@ dependencies: common: - output_types: conda packages: - - cudf==24.12.*,>=0.0.0a0 + - cudf==25.2.*,>=0.0.0a0 - pandas - pytest - pytest-xdist @@ -263,13 +263,13 @@ dependencies: common: - output_types: conda packages: - - cuml==24.12.*,>=0.0.0a0 + - cuml==25.2.*,>=0.0.0a0 - scikit-learn test_cugraph: common: - output_types: conda packages: - - cugraph==24.12.*,>=0.0.0a0 + - cugraph==25.2.*,>=0.0.0a0 - networkx test_ibis: common: diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 280dd52bb22..df3e6b87991 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "cuda-python>=11.7.1,<12.0a0,<=11.8.3", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", - "libcudf==24.12.*,>=0.0.0a0", + "libcudf==25.2.*,>=0.0.0a0", "numba-cuda>=0.0.13,<0.0.18", "numpy>=1.23,<3.0a0", "nvtx>=0.2.1", @@ -32,9 +32,9 @@ dependencies = [ "ptxcompiler", "pyarrow>=14.0.0,<19.0.0a0,!=17.0.0; platform_machine=='aarch64'", "pyarrow>=14.0.0,<19.0.0a0; platform_machine=='x86_64'", - "pylibcudf==24.12.*,>=0.0.0a0", + "pylibcudf==25.2.*,>=0.0.0a0", "rich", - "rmm==24.12.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -121,11 +121,11 @@ matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", - "libcudf==24.12.*,>=0.0.0a0", - "librmm==24.12.*,>=0.0.0a0", + "libcudf==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", - "pylibcudf==24.12.*,>=0.0.0a0", - "rmm==24.12.*,>=0.0.0a0", + "pylibcudf==25.2.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.scikit-build] diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index b2ea3f06e48..29fcd161444 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -18,7 +18,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cudf==24.12.*,>=0.0.0a0", + "cudf==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.optional-dependencies] diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py index c446ce0384e..7915c9e6b18 100644 --- a/python/cudf_polars/cudf_polars/callback.py +++ b/python/cudf_polars/cudf_polars/callback.py @@ -129,7 +129,6 @@ def set_device(device: int | None) -> Generator[int, None, None]: def _callback( ir: IR, - config: GPUEngine, with_columns: list[str] | None, pyarrow_predicate: str | None, n_rows: int | None, @@ -146,7 +145,7 @@ def _callback( set_device(device), set_memory_resource(memory_resource), ): - return ir.evaluate(cache={}, config=config).to_polars() + return ir.evaluate(cache={}).to_polars() def validate_config_options(config: dict) -> None: @@ -201,7 +200,7 @@ def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None: validate_config_options(config.config) with nvtx.annotate(message="ConvertIR", domain="cudf_polars"): - translator = Translator(nt) + translator = Translator(nt, config) ir = translator.translate_ir() ir_translation_errors = translator.errors if len(ir_translation_errors): @@ -225,7 +224,6 @@ def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None: partial( _callback, ir, - config, device=device, memory_resource=memory_resource, ) diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index e44a0e0857a..62a2da9dcea 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -37,8 +37,6 @@ from collections.abc import Callable, Hashable, MutableMapping, Sequence from typing import Literal - from polars import GPUEngine - from cudf_polars.typing import Schema @@ -182,9 +180,7 @@ def get_hashable(self) -> Hashable: translation phase should fail earlier. """ - def evaluate( - self, *, cache: MutableMapping[int, DataFrame], config: GPUEngine - ) -> DataFrame: + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """ Evaluate the node (recursively) and return a dataframe. @@ -193,8 +189,6 @@ def evaluate( cache Mapping from cached node ids to constructed DataFrames. Used to implement evaluation of the `Cache` node. - config - GPU engine configuration. Notes ----- @@ -214,9 +208,8 @@ def evaluate( translation phase should fail earlier. """ return self.do_evaluate( - config, *self._non_child_args, - *(child.evaluate(cache=cache, config=config) for child in self.children), + *(child.evaluate(cache=cache) for child in self.children), ) @@ -263,6 +256,7 @@ class Scan(IR): "typ", "reader_options", "cloud_options", + "config_options", "paths", "with_columns", "skip_rows", @@ -275,6 +269,7 @@ class Scan(IR): "typ", "reader_options", "cloud_options", + "config_options", "paths", "with_columns", "skip_rows", @@ -288,6 +283,8 @@ class Scan(IR): """Reader-specific options, as dictionary.""" cloud_options: dict[str, Any] | None """Cloud-related authentication options, currently ignored.""" + config_options: dict[str, Any] + """GPU-specific configuration options""" paths: list[str] """List of paths to read from.""" with_columns: list[str] | None @@ -310,6 +307,7 @@ def __init__( typ: str, reader_options: dict[str, Any], cloud_options: dict[str, Any] | None, + config_options: dict[str, Any], paths: list[str], with_columns: list[str] | None, skip_rows: int, @@ -321,6 +319,7 @@ def __init__( self.typ = typ self.reader_options = reader_options self.cloud_options = cloud_options + self.config_options = config_options self.paths = paths self.with_columns = with_columns self.skip_rows = skip_rows @@ -331,6 +330,7 @@ def __init__( schema, typ, reader_options, + config_options, paths, with_columns, skip_rows, @@ -412,6 +412,7 @@ def get_hashable(self) -> Hashable: self.typ, json.dumps(self.reader_options), json.dumps(self.cloud_options), + json.dumps(self.config_options), tuple(self.paths), tuple(self.with_columns) if self.with_columns is not None else None, self.skip_rows, @@ -423,10 +424,10 @@ def get_hashable(self) -> Hashable: @classmethod def do_evaluate( cls, - config: GPUEngine, schema: Schema, typ: str, reader_options: dict[str, Any], + config_options: dict[str, Any], paths: list[str], with_columns: list[str] | None, skip_rows: int, @@ -509,7 +510,7 @@ def do_evaluate( colnames[0], ) elif typ == "parquet": - parquet_options = config.config.get("parquet_options", {}) + parquet_options = config_options.get("parquet_options", {}) if parquet_options.get("chunked", True): reader = plc.io.parquet.ChunkedParquetReader( plc.io.SourceInfo(paths), @@ -657,16 +658,14 @@ def __init__(self, schema: Schema, key: int, value: IR): @classmethod def do_evaluate( - cls, config: GPUEngine, key: int, df: DataFrame + cls, key: int, df: DataFrame ) -> DataFrame: # pragma: no cover; basic evaluation never calls this """Evaluate and return a dataframe.""" # Our value has already been computed for us, so let's just # return it. return df - def evaluate( - self, *, cache: MutableMapping[int, DataFrame], config: GPUEngine - ) -> DataFrame: + def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" # We must override the recursion scheme because we don't want # to recurse if we're in the cache. @@ -674,9 +673,7 @@ def evaluate( return cache[self.key] except KeyError: (value,) = self.children - return cache.setdefault( - self.key, value.evaluate(cache=cache, config=config) - ) + return cache.setdefault(self.key, value.evaluate(cache=cache)) class DataFrameScan(IR): @@ -722,7 +719,6 @@ def get_hashable(self) -> Hashable: @classmethod def do_evaluate( cls, - config: GPUEngine, schema: Schema, df: Any, projection: tuple[str, ...] | None, @@ -770,7 +766,6 @@ def __init__( @classmethod def do_evaluate( cls, - config: GPUEngine, exprs: tuple[expr.NamedExpr, ...], should_broadcast: bool, # noqa: FBT001 df: DataFrame, @@ -806,7 +801,6 @@ def __init__( @classmethod def do_evaluate( cls, - config: GPUEngine, exprs: tuple[expr.NamedExpr, ...], df: DataFrame, ) -> DataFrame: # pragma: no cover; not exposed by polars yet @@ -899,7 +893,6 @@ def check_agg(agg: expr.Expr) -> int: @classmethod def do_evaluate( cls, - config: GPUEngine, keys_in: Sequence[expr.NamedExpr], agg_requests: Sequence[expr.NamedExpr], maintain_order: bool, # noqa: FBT001 @@ -1021,7 +1014,6 @@ def __init__( @classmethod def do_evaluate( cls, - config: GPUEngine, predicate: plc.expressions.Expression, zlice: tuple[int, int] | None, suffix: str, @@ -1194,7 +1186,6 @@ def _reorder_maps( @classmethod def do_evaluate( cls, - config: GPUEngine, left_on_exprs: Sequence[expr.NamedExpr], right_on_exprs: Sequence[expr.NamedExpr], options: tuple[ @@ -1318,7 +1309,6 @@ def __init__( @classmethod def do_evaluate( cls, - config: GPUEngine, exprs: Sequence[expr.NamedExpr], should_broadcast: bool, # noqa: FBT001 df: DataFrame, @@ -1381,7 +1371,6 @@ def __init__( @classmethod def do_evaluate( cls, - config: GPUEngine, keep: plc.stream_compaction.DuplicateKeepOption, subset: frozenset[str] | None, zlice: tuple[int, int] | None, @@ -1471,7 +1460,6 @@ def __init__( @classmethod def do_evaluate( cls, - config: GPUEngine, by: Sequence[expr.NamedExpr], order: Sequence[plc.types.Order], null_order: Sequence[plc.types.NullOrder], @@ -1527,9 +1515,7 @@ def __init__(self, schema: Schema, offset: int, length: int, df: IR): self.children = (df,) @classmethod - def do_evaluate( - cls, config: GPUEngine, offset: int, length: int, df: DataFrame - ) -> DataFrame: + def do_evaluate(cls, offset: int, length: int, df: DataFrame) -> DataFrame: """Evaluate and return a dataframe.""" return df.slice((offset, length)) @@ -1549,9 +1535,7 @@ def __init__(self, schema: Schema, mask: expr.NamedExpr, df: IR): self.children = (df,) @classmethod - def do_evaluate( - cls, config: GPUEngine, mask_expr: expr.NamedExpr, df: DataFrame - ) -> DataFrame: + def do_evaluate(cls, mask_expr: expr.NamedExpr, df: DataFrame) -> DataFrame: """Evaluate and return a dataframe.""" (mask,) = broadcast(mask_expr.evaluate(df), target_length=df.num_rows) return df.filter(mask) @@ -1569,7 +1553,7 @@ def __init__(self, schema: Schema, df: IR): self.children = (df,) @classmethod - def do_evaluate(cls, config: GPUEngine, schema: Schema, df: DataFrame) -> DataFrame: + def do_evaluate(cls, schema: Schema, df: DataFrame) -> DataFrame: """Evaluate and return a dataframe.""" # This can reorder things. columns = broadcast( @@ -1645,9 +1629,7 @@ def __init__(self, schema: Schema, name: str, options: Any, df: IR): self._non_child_args = (name, self.options) @classmethod - def do_evaluate( - cls, config: GPUEngine, name: str, options: Any, df: DataFrame - ) -> DataFrame: + def do_evaluate(cls, name: str, options: Any, df: DataFrame) -> DataFrame: """Evaluate and return a dataframe.""" if name == "rechunk": # No-op in our data model @@ -1726,9 +1708,7 @@ def __init__(self, schema: Schema, zlice: tuple[int, int] | None, *children: IR) raise NotImplementedError("Schema mismatch") @classmethod - def do_evaluate( - cls, config: GPUEngine, zlice: tuple[int, int] | None, *dfs: DataFrame - ) -> DataFrame: + def do_evaluate(cls, zlice: tuple[int, int] | None, *dfs: DataFrame) -> DataFrame: """Evaluate and return a dataframe.""" # TODO: only evaluate what we need if we have a slice? return DataFrame.from_table( @@ -1777,7 +1757,7 @@ def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table: ) @classmethod - def do_evaluate(cls, config: GPUEngine, *dfs: DataFrame) -> DataFrame: + def do_evaluate(cls, *dfs: DataFrame) -> DataFrame: """Evaluate and return a dataframe.""" max_rows = max(df.num_rows for df in dfs) # Horizontal concatenation extends shorter tables with nulls diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index e8ed009cdf2..12fc2a196cd 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -26,6 +26,8 @@ from cudf_polars.utils import dtypes, sorting if TYPE_CHECKING: + from polars import GPUEngine + from cudf_polars.typing import NodeTraverser __all__ = ["Translator", "translate_named_expr"] @@ -39,10 +41,13 @@ class Translator: ---------- visitor Polars NodeTraverser object + config + GPU engine configuration. """ - def __init__(self, visitor: NodeTraverser): + def __init__(self, visitor: NodeTraverser, config: GPUEngine): self.visitor = visitor + self.config = config self.errors: list[Exception] = [] def translate_ir(self, *, n: int | None = None) -> ir.IR: @@ -228,6 +233,7 @@ def _( typ, reader_options, cloud_options, + translator.config.config.copy(), node.paths, with_columns, skip_rows, diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index 1821cfedfb8..ba0bb12a0fb 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -122,7 +122,7 @@ def assert_ir_translation_raises(q: pl.LazyFrame, *exceptions: type[Exception]) AssertionError If the specified exceptions were not raised. """ - translator = Translator(q._ldf.visit()) + translator = Translator(q._ldf.visit(), GPUEngine()) translator.translate_ir() if errors := translator.errors: for err in errors: diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md index 2f2361223d2..a8cad5622fb 100644 --- a/python/cudf_polars/docs/overview.md +++ b/python/cudf_polars/docs/overview.md @@ -8,7 +8,7 @@ You will need: preferred configuration. Or else, use [rustup](https://www.rust-lang.org/tools/install) 2. A [cudf development - environment](https://github.com/rapidsai/cudf/blob/branch-24.12/CONTRIBUTING.md#setting-up-your-build-environment). + environment](https://github.com/rapidsai/cudf/blob/branch-25.02/CONTRIBUTING.md#setting-up-your-build-environment). The combined devcontainer works, or whatever your favourite approach is. :::{note} @@ -459,11 +459,12 @@ and convert back to polars: ```python from cudf_polars.dsl.translate import Translator +import polars as pl q = ... # Convert to our IR -ir = Translator(q._ldf.visit()).translate_ir() +ir = Translator(q._ldf.visit(), pl.GPUEngine()).translate_ir() # DataFrame living on the device result = ir.evaluate(cache={}) diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 1ce4d7b6867..f050a7c568a 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "polars>=1.11,<1.15", - "pylibcudf==24.12.*,>=0.0.0a0", + "pylibcudf==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -41,7 +41,7 @@ test = [ "pytest<8", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. experimental = [ - "rapids-dask-dependency==24.12.*,>=0.0.0a0", + "rapids-dask-dependency==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] diff --git a/python/cudf_polars/tests/dsl/test_to_ast.py b/python/cudf_polars/tests/dsl/test_to_ast.py index 795ba991c62..60ff7a655e6 100644 --- a/python/cudf_polars/tests/dsl/test_to_ast.py +++ b/python/cudf_polars/tests/dsl/test_to_ast.py @@ -60,10 +60,10 @@ def df(): ) def test_compute_column(expr, df): q = df.select(expr) - ir = Translator(q._ldf.visit()).translate_ir() + ir = Translator(q._ldf.visit(), pl.GPUEngine()).translate_ir() assert isinstance(ir, ir_nodes.Select) - table = ir.children[0].evaluate(cache={}, config=pl.GPUEngine()) + table = ir.children[0].evaluate(cache={}) name_to_index = {c.name: i for i, c in enumerate(table.columns)} def compute_column(e): diff --git a/python/cudf_polars/tests/dsl/test_traversal.py b/python/cudf_polars/tests/dsl/test_traversal.py index 8849629e0fd..2f4df9289f8 100644 --- a/python/cudf_polars/tests/dsl/test_traversal.py +++ b/python/cudf_polars/tests/dsl/test_traversal.py @@ -109,7 +109,7 @@ def test_rewrite_ir_node(): df = pl.LazyFrame({"a": [1, 2, 1], "b": [1, 3, 4]}) q = df.group_by("a").agg(pl.col("b").sum()).sort("b") - orig = Translator(q._ldf.visit()).translate_ir() + orig = Translator(q._ldf.visit(), pl.GPUEngine()).translate_ir() new_df = pl.DataFrame({"a": [1, 1, 2], "b": [-1, -2, -4]}) @@ -124,7 +124,7 @@ def replace_df(node, rec): new = mapper(orig) - result = new.evaluate(cache={}, config=pl.GPUEngine()).to_polars() + result = new.evaluate(cache={}).to_polars() expect = pl.DataFrame({"a": [2, 1], "b": [-4, -3]}) @@ -150,10 +150,10 @@ def replace_scan(node, rec): mapper = CachingVisitor(replace_scan) - orig = Translator(q._ldf.visit()).translate_ir() + orig = Translator(q._ldf.visit(), pl.GPUEngine()).translate_ir() new = mapper(orig) - result = new.evaluate(cache={}, config=pl.GPUEngine()).to_polars() + result = new.evaluate(cache={}).to_polars() expect = q.collect() @@ -174,7 +174,7 @@ def test_rewrite_names_and_ops(): .collect() ) - qir = Translator(q._ldf.visit()).translate_ir() + qir = Translator(q._ldf.visit(), pl.GPUEngine()).translate_ir() @singledispatch def _transform(e: expr.Expr, fn: ExprTransformer) -> expr.Expr: @@ -224,6 +224,6 @@ def _(node: ir.Select, fn: IRTransformer): new_ir = rewriter(qir) - got = new_ir.evaluate(cache={}, config=pl.GPUEngine()).to_polars() + got = new_ir.evaluate(cache={}).to_polars() assert_frame_equal(expect, got) diff --git a/python/cudf_polars/tests/expressions/test_sort.py b/python/cudf_polars/tests/expressions/test_sort.py index 49e075e0338..dd080f41483 100644 --- a/python/cudf_polars/tests/expressions/test_sort.py +++ b/python/cudf_polars/tests/expressions/test_sort.py @@ -68,11 +68,7 @@ def test_setsorted(descending, nulls_last, with_nulls): assert_gpu_result_equal(q) - df = ( - Translator(q._ldf.visit()) - .translate_ir() - .evaluate(cache={}, config=pl.GPUEngine()) - ) + df = Translator(q._ldf.visit(), pl.GPUEngine()).translate_ir().evaluate(cache={}) a = df.column_map["a"] diff --git a/python/custreamz/custreamz/kafka.py b/python/custreamz/custreamz/kafka.py index 4cbd7244751..166b7d98592 100644 --- a/python/custreamz/custreamz/kafka.py +++ b/python/custreamz/custreamz/kafka.py @@ -151,9 +151,14 @@ def read_gdf( "parquet": cudf.io.read_parquet, } - result = cudf_readers[message_format]( - kafka_datasource, engine="cudf", lines=True - ) + if message_format == "json": + result = cudf_readers[message_format]( + kafka_datasource, engine="cudf", lines=True + ) + else: + result = cudf_readers[message_format]( + kafka_datasource, engine="cudf" + ) # Close up the cudf datasource instance # TODO: Ideally the C++ destructor should handle the diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index dd67a019c77..ed43ab83d53 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -20,8 +20,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "confluent-kafka>=2.5.0,<2.6.0a0", - "cudf==24.12.*,>=0.0.0a0", - "cudf_kafka==24.12.*,>=0.0.0a0", + "cudf==25.2.*,>=0.0.0a0", + "cudf_kafka==25.2.*,>=0.0.0a0", "streamz", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/dask_cudf/dask_cudf/_legacy/io/parquet.py b/python/dask_cudf/dask_cudf/_legacy/io/parquet.py index 39ac6474958..c0638e4a1c3 100644 --- a/python/dask_cudf/dask_cudf/_legacy/io/parquet.py +++ b/python/dask_cudf/dask_cudf/_legacy/io/parquet.py @@ -86,7 +86,8 @@ def _read_paths( ) dataset_kwargs = dataset_kwargs or {} - dataset_kwargs["partitioning"] = partitioning or "hive" + if partitions: + dataset_kwargs["partitioning"] = partitioning or "hive" # Use cudf to read in data try: diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index fb02e0ac772..9c5d5523019 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -700,140 +700,10 @@ def from_dict( ) @staticmethod - def read_parquet(path, *args, filesystem="fsspec", engine=None, **kwargs): - import dask_expr as dx - import fsspec - - if ( - isinstance(filesystem, fsspec.AbstractFileSystem) - or isinstance(filesystem, str) - and filesystem.lower() == "fsspec" - ): - # Default "fsspec" filesystem - from dask_cudf._legacy.io.parquet import CudfEngine + def read_parquet(*args, **kwargs): + from dask_cudf.io.parquet import read_parquet as read_parquet_expr - _raise_unsupported_parquet_kwargs(**kwargs) - return _default_backend( - dx.read_parquet, - path, - *args, - filesystem=filesystem, - engine=CudfEngine, - **kwargs, - ) - - else: - # EXPERIMENTAL filesystem="arrow" support. - # This code path uses PyArrow for IO, which is only - # beneficial for remote storage (e.g. S3) - - from fsspec.utils import stringify_path - from pyarrow import fs as pa_fs - - # CudfReadParquetPyarrowFS requires import of distributed beforehand - # (See: https://github.com/dask/dask/issues/11352) - import distributed # noqa: F401 - from dask.core import flatten - from dask.dataframe.utils import pyarrow_strings_enabled - - from dask_cudf.io.parquet import CudfReadParquetPyarrowFS - - if args: - raise ValueError(f"Unexpected positional arguments: {args}") - - if not ( - isinstance(filesystem, pa_fs.FileSystem) - or isinstance(filesystem, str) - and filesystem.lower() in ("arrow", "pyarrow") - ): - raise ValueError(f"Unexpected filesystem value: {filesystem}.") - - if not PYARROW_GE_15: - raise NotImplementedError( - "Experimental Arrow filesystem support requires pyarrow>=15" - ) - - if not isinstance(path, str): - path = stringify_path(path) - - # Extract kwargs - columns = kwargs.pop("columns", None) - filters = kwargs.pop("filters", None) - categories = kwargs.pop("categories", None) - index = kwargs.pop("index", None) - storage_options = kwargs.pop("storage_options", None) - dtype_backend = kwargs.pop("dtype_backend", None) - calculate_divisions = kwargs.pop("calculate_divisions", False) - ignore_metadata_file = kwargs.pop("ignore_metadata_file", False) - metadata_task_size = kwargs.pop("metadata_task_size", None) - split_row_groups = kwargs.pop("split_row_groups", "infer") - blocksize = kwargs.pop("blocksize", "default") - aggregate_files = kwargs.pop("aggregate_files", None) - parquet_file_extension = kwargs.pop( - "parquet_file_extension", (".parq", ".parquet", ".pq") - ) - arrow_to_pandas = kwargs.pop("arrow_to_pandas", None) - open_file_options = kwargs.pop("open_file_options", None) - - # Validate and normalize kwargs - kwargs["dtype_backend"] = dtype_backend - if arrow_to_pandas is not None: - raise ValueError( - "arrow_to_pandas not supported for the 'cudf' backend." - ) - if open_file_options is not None: - raise ValueError( - "The open_file_options argument is no longer supported " - "by the 'cudf' backend." - ) - if filters is not None: - for filter in flatten(filters, container=list): - _, op, val = filter - if op == "in" and not isinstance(val, (set, list, tuple)): - raise TypeError( - "Value of 'in' filter must be a list, set or tuple." - ) - if metadata_task_size is not None: - raise NotImplementedError( - "metadata_task_size is not supported when using the pyarrow filesystem." - ) - if split_row_groups != "infer": - raise NotImplementedError( - "split_row_groups is not supported when using the pyarrow filesystem." - ) - if parquet_file_extension != (".parq", ".parquet", ".pq"): - raise NotImplementedError( - "parquet_file_extension is not supported when using the pyarrow filesystem." - ) - if blocksize is not None and blocksize != "default": - warnings.warn( - "blocksize is not supported when using the pyarrow filesystem." - "blocksize argument will be ignored." - ) - if aggregate_files is not None: - warnings.warn( - "aggregate_files is not supported when using the pyarrow filesystem. " - "Please use the 'dataframe.parquet.minimum-partition-size' config." - "aggregate_files argument will be ignored." - ) - - return dx.new_collection( - CudfReadParquetPyarrowFS( - path, - columns=dx._util._convert_to_list(columns), - filters=filters, - categories=categories, - index=index, - calculate_divisions=calculate_divisions, - storage_options=storage_options, - filesystem=filesystem, - ignore_metadata_file=ignore_metadata_file, - arrow_to_pandas=arrow_to_pandas, - pyarrow_strings_enabled=pyarrow_strings_enabled(), - kwargs=kwargs, - _series=isinstance(columns, str), - ) - ) + return read_parquet_expr(*args, **kwargs) @staticmethod def read_csv( diff --git a/python/dask_cudf/dask_cudf/io/__init__.py b/python/dask_cudf/dask_cudf/io/__init__.py index 1e0f24d78ce..212951336c9 100644 --- a/python/dask_cudf/dask_cudf/io/__init__.py +++ b/python/dask_cudf/dask_cudf/io/__init__.py @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from dask_cudf import _deprecated_api +from dask_cudf import _deprecated_api, QUERY_PLANNING_ON from . import csv, orc, json, parquet, text # noqa: F401 @@ -22,9 +22,14 @@ read_text = _deprecated_api( "dask_cudf.io.read_text", new_api="dask_cudf.read_text" ) -read_parquet = _deprecated_api( - "dask_cudf.io.read_parquet", new_api="dask_cudf.read_parquet" -) +if QUERY_PLANNING_ON: + read_parquet = parquet.read_parquet +else: + read_parquet = _deprecated_api( + "The legacy dask_cudf.io.read_parquet API", + new_api="dask_cudf.read_parquet", + rec="", + ) to_parquet = _deprecated_api( "dask_cudf.io.to_parquet", new_api="dask_cudf._legacy.io.parquet.to_parquet", diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index a7a116875ea..bf8fae552c2 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -1,58 +1,252 @@ # Copyright (c) 2024, NVIDIA CORPORATION. + +from __future__ import annotations + import functools +import itertools +import math +import os +import warnings +from typing import TYPE_CHECKING, Any +import numpy as np import pandas as pd -from dask_expr.io.io import FusedParquetIO -from dask_expr.io.parquet import FragmentWrapper, ReadParquetPyarrowFS +from dask_expr._expr import Elemwise +from dask_expr._util import _convert_to_list +from dask_expr.io.io import FusedIO, FusedParquetIO +from dask_expr.io.parquet import ( + FragmentWrapper, + ReadParquetFSSpec, + ReadParquetPyarrowFS, +) from dask._task_spec import Task +from dask.dataframe.io.parquet.arrow import _filters_to_expression +from dask.dataframe.io.parquet.core import ParquetFunctionWrapper +from dask.tokenize import tokenize +from dask.utils import parse_bytes import cudf -from dask_cudf import _deprecated_api +from dask_cudf import QUERY_PLANNING_ON, _deprecated_api # Dask-expr imports CudfEngine from this module from dask_cudf._legacy.io.parquet import CudfEngine # noqa: F401 +if TYPE_CHECKING: + from collections.abc import MutableMapping + + +_DEVICE_SIZE_CACHE: int | None = None + + +def _get_device_size(): + try: + # Use PyNVML to find the worker device size. + import pynvml + + pynvml.nvmlInit() + index = os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0] + if index and not index.isnumeric(): + # This means index is UUID. This works for both MIG and non-MIG device UUIDs. + handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(index)) + else: + # This is a device index + handle = pynvml.nvmlDeviceGetHandleByIndex(int(index)) + return pynvml.nvmlDeviceGetMemoryInfo(handle).total + + except (ImportError, ValueError): + # Fall back to a conservative 8GiB default + return 8 * 1024**3 + + +def _normalize_blocksize(fraction: float = 0.03125): + # Set the blocksize to fraction * . + # We use the smallest worker device to set . + # (Default blocksize is 1/32 * ) + global _DEVICE_SIZE_CACHE + + if _DEVICE_SIZE_CACHE is None: + try: + # Check distributed workers (if a client exists) + from distributed import get_client + + client = get_client() + # TODO: Check "GPU" worker resources only. + # Depends on (https://github.com/rapidsai/dask-cuda/pull/1401) + device_size = min(client.run(_get_device_size).values()) + except (ImportError, ValueError): + device_size = _get_device_size() + _DEVICE_SIZE_CACHE = device_size + + return int(_DEVICE_SIZE_CACHE * fraction) + + +class NoOp(Elemwise): + # Workaround - Always wrap read_parquet operations + # in a NoOp to trigger tune_up optimizations. + _parameters = ["frame"] + _is_length_preserving = True + _projection_passthrough = True + _filter_passthrough = True + _preserves_partitioning_information = True -class CudfFusedParquetIO(FusedParquetIO): @staticmethod - def _load_multiple_files( - frag_filters, - columns, - schema, - **to_pandas_kwargs, - ): - import pyarrow as pa + def operation(x): + return x - from dask.base import apply, tokenize - from dask.threaded import get - token = tokenize(frag_filters, columns, schema) - name = f"pq-file-{token}" - dsk = { - (name, i): ( - CudfReadParquetPyarrowFS._fragment_to_table, - frag, - filter, - columns, - schema, - ) - for i, (frag, filter) in enumerate(frag_filters) - } - dsk[name] = ( - apply, - pa.concat_tables, - [list(dsk.keys())], - {"promote_options": "permissive"}, - ) - return CudfReadParquetPyarrowFS._table_to_pandas( - get(dsk, name), - **to_pandas_kwargs, - ) +class CudfReadParquetFSSpec(ReadParquetFSSpec): + _STATS_CACHE: MutableMapping[str, Any] = {} + + def approx_statistics(self): + # Use a few files to approximate column-size statistics + key = tokenize(self._dataset_info["ds"].files[:10], self.filters) + try: + return self._STATS_CACHE[key] + + except KeyError: + # Account for filters + ds_filters = None + if self.filters is not None: + ds_filters = _filters_to_expression(self.filters) + + # Use average total_uncompressed_size of three files + n_sample = 3 + column_sizes = {} + for i, frag in enumerate( + self._dataset_info["ds"].get_fragments(ds_filters) + ): + md = frag.metadata + for rg in range(md.num_row_groups): + row_group = md.row_group(rg) + for col in range(row_group.num_columns): + column = row_group.column(col) + name = column.path_in_schema + if name not in column_sizes: + column_sizes[name] = np.zeros( + n_sample, dtype="int64" + ) + column_sizes[name][i] += column.total_uncompressed_size + if (i + 1) >= n_sample: + break + + # Reorganize stats to look like arrow-fs version + self._STATS_CACHE[key] = { + "columns": [ + { + "path_in_schema": name, + "total_uncompressed_size": np.mean(sizes), + } + for name, sizes in column_sizes.items() + ] + } + return self._STATS_CACHE[key] + + @functools.cached_property + def _fusion_compression_factor(self): + # Disable fusion when blocksize=None + if self.blocksize is None: + return 1 + + # At this point, we *may* have used `blockwise` + # already to split or aggregate files. We don't + # *know* if the current partitions correspond to + # individual/full files, multiple/aggregated files + # or partial/split files. + # + # Therefore, we need to use the statistics from + # a few files to estimate the current partition + # size. This size should be similar to `blocksize` + # *if* aggregate_files is True or if the files + # are *smaller* than `blocksize`. + + # Step 1: Sample statistics + approx_stats = self.approx_statistics() + projected_size, original_size = 0, 0 + col_op = self.operand("columns") or self.columns + for col in approx_stats["columns"]: + original_size += col["total_uncompressed_size"] + if col["path_in_schema"] in col_op or ( + (split_name := col["path_in_schema"].split(".")) + and split_name[0] in col_op + ): + projected_size += col["total_uncompressed_size"] + if original_size < 1 or projected_size < 1: + return 1 + + # Step 2: Estimate the correction factor + # (Correct for possible pre-optimization fusion/splitting) + blocksize = parse_bytes(self.blocksize) + if original_size > blocksize: + # Input files are bigger than blocksize + # and we already split these large files. + # (correction_factor > 1) + correction_factor = original_size / blocksize + elif self.aggregate_files: + # Input files are smaller than blocksize + # and we already aggregate small files. + # (correction_factor == 1) + correction_factor = 1 + else: + # Input files are smaller than blocksize + # but we haven't aggregate small files yet. + # (correction_factor < 1) + correction_factor = original_size / blocksize + + # Step 3. Estimate column-projection factor + if self.operand("columns") is None: + projection_factor = 1 + else: + projection_factor = projected_size / original_size + + return max(projection_factor * correction_factor, 0.001) + + def _tune_up(self, parent): + if self._fusion_compression_factor >= 1: + return + if isinstance(parent, FusedIO): + return + return parent.substitute(self, CudfFusedIO(self)) class CudfReadParquetPyarrowFS(ReadParquetPyarrowFS): + _parameters = [ + "path", + "columns", + "filters", + "categories", + "index", + "storage_options", + "filesystem", + "blocksize", + "ignore_metadata_file", + "calculate_divisions", + "arrow_to_pandas", + "pyarrow_strings_enabled", + "kwargs", + "_partitions", + "_series", + "_dataset_info_cache", + ] + _defaults = { + "columns": None, + "filters": None, + "categories": None, + "index": None, + "storage_options": None, + "filesystem": None, + "blocksize": "256 MiB", + "ignore_metadata_file": True, + "calculate_divisions": False, + "arrow_to_pandas": None, + "pyarrow_strings_enabled": True, + "kwargs": None, + "_partitions": None, + "_series": False, + "_dataset_info_cache": None, + } + @functools.cached_property def _dataset_info(self): from dask_cudf._legacy.io.parquet import ( @@ -86,11 +280,92 @@ def _dataset_info(self): @staticmethod def _table_to_pandas(table, index_name): - df = cudf.DataFrame.from_arrow(table) - if index_name is not None: - df = df.set_index(index_name) + if isinstance(table, cudf.DataFrame): + df = table + else: + df = cudf.DataFrame.from_arrow(table) + if index_name is not None: + return df.set_index(index_name) return df + @staticmethod + def _fragments_to_cudf_dataframe( + fragment_wrappers, + filters, + columns, + schema, + ): + from dask.dataframe.io.utils import _is_local_fs + + from cudf.io.parquet import _apply_post_filters, _normalize_filters + + if not isinstance(fragment_wrappers, list): + fragment_wrappers = [fragment_wrappers] + + filesystem = None + paths, row_groups = [], [] + for fw in fragment_wrappers: + frag = fw.fragment if isinstance(fw, FragmentWrapper) else fw + paths.append(frag.path) + row_groups.append( + [rg.id for rg in frag.row_groups] if frag.row_groups else None + ) + if filesystem is None: + filesystem = frag.filesystem + + if _is_local_fs(filesystem): + filesystem = None + else: + from fsspec.implementations.arrow import ArrowFSWrapper + + filesystem = ArrowFSWrapper(filesystem) + protocol = filesystem.protocol + paths = [f"{protocol}://{path}" for path in paths] + + filters = _normalize_filters(filters) + projected_columns = None + if columns and filters: + projected_columns = [c for c in columns if c is not None] + columns = sorted( + set(v[0] for v in itertools.chain.from_iterable(filters)) + | set(projected_columns) + ) + + if row_groups == [None for path in paths]: + row_groups = None + + df = cudf.read_parquet( + paths, + columns=columns, + filters=filters, + row_groups=row_groups, + dataset_kwargs={"schema": schema}, + ) + + # Apply filters (if any are defined) + df = _apply_post_filters(df, filters) + if projected_columns: + # Elements of `projected_columns` may now be in the index. + # We must filter these names from our projection + projected_columns = [ + col for col in projected_columns if col in df._column_names + ] + df = df[projected_columns] + + # TODO: Deal with hive partitioning. + # Note that ReadParquetPyarrowFS does NOT support this yet anyway. + return df + + @functools.cached_property + def _use_device_io(self): + from dask.dataframe.io.utils import _is_local_fs + + # Use host for remote filesystem only + # (Unless we are using kvikio-S3) + return _is_local_fs(self.fs) or ( + self.fs.type_name == "s3" and cudf.get_option("kvikio_remote_io") + ) + def _filtered_task(self, name, index: int): columns = self.columns.copy() index_name = self.index.name @@ -101,12 +376,17 @@ def _filtered_task(self, name, index: int): if columns is None: columns = list(schema.names) columns.append(index_name) + + frag_to_table = self._fragment_to_table + if self._use_device_io: + frag_to_table = self._fragments_to_cudf_dataframe + return Task( name, self._table_to_pandas, Task( None, - self._fragment_to_table, + frag_to_table, fragment_wrapper=FragmentWrapper( self.fragments[index], filesystem=self.fs ), @@ -117,18 +397,441 @@ def _filtered_task(self, name, index: int): index_name=index_name, ) + @property + def _fusion_compression_factor(self): + blocksize = self.blocksize + if blocksize is None: + return 1 + elif blocksize == "default": + blocksize = "256MiB" + + projected_size = 0 + approx_stats = self.approx_statistics() + col_op = self.operand("columns") or self.columns + for col in approx_stats["columns"]: + if col["path_in_schema"] in col_op or ( + (split_name := col["path_in_schema"].split(".")) + and split_name[0] in col_op + ): + projected_size += col["total_uncompressed_size"] + + if projected_size < 1: + return 1 + + aggregate_files = max(1, int(parse_bytes(blocksize) / projected_size)) + return max(1 / aggregate_files, 0.001) + def _tune_up(self, parent): if self._fusion_compression_factor >= 1: return - if isinstance(parent, CudfFusedParquetIO): + fused_cls = ( + CudfFusedParquetIO + if self._use_device_io + else CudfFusedParquetIOHost + ) + if isinstance(parent, fused_cls): return - return parent.substitute(self, CudfFusedParquetIO(self)) + return parent.substitute(self, fused_cls(self)) -read_parquet = _deprecated_api( - "dask_cudf.io.parquet.read_parquet", - new_api="dask_cudf.read_parquet", -) +class CudfFusedIO(FusedIO): + def _task(self, name, index: int): + expr = self.operand("_expr") + bucket = self._fusion_buckets[index] + io_func = expr._filtered_task(name, 0).func + if not isinstance( + io_func, ParquetFunctionWrapper + ) or io_func.common_kwargs.get("partitions", None): + # Just use "simple" fusion if we have an unexpected + # callable, or we are dealing with hive partitioning. + return Task( + name, + cudf.concat, + [expr._filtered_task(name, i) for i in bucket], + ) + + pieces = [] + for i in bucket: + piece = expr._filtered_task(name, i).args[0] + if isinstance(piece, list): + pieces.extend(piece) + else: + pieces.append(piece) + return Task(name, io_func, pieces) + + +class CudfFusedParquetIO(FusedParquetIO): + @functools.cached_property + def _fusion_buckets(self): + partitions = self.operand("_expr")._partitions + npartitions = len(partitions) + + step = math.ceil(1 / self.operand("_expr")._fusion_compression_factor) + + # TODO: Heuristic to limit fusion should probably + # account for the number of workers. For now, just + # limiting fusion to 100 partitions at once. + step = min(step, 100) + + buckets = [ + partitions[i : i + step] for i in range(0, npartitions, step) + ] + return buckets + + @classmethod + def _load_multiple_files( + cls, + frag_filters, + columns, + schema, + **to_pandas_kwargs, + ): + frag_to_table = CudfReadParquetPyarrowFS._fragments_to_cudf_dataframe + return CudfReadParquetPyarrowFS._table_to_pandas( + frag_to_table( + [frag[0] for frag in frag_filters], + frag_filters[0][1], # TODO: Check for consistent filters? + columns, + schema, + ), + **to_pandas_kwargs, + ) + + +class CudfFusedParquetIOHost(CudfFusedParquetIO): + @classmethod + def _load_multiple_files( + cls, + frag_filters, + columns, + schema, + **to_pandas_kwargs, + ): + import pyarrow as pa + + from dask.base import apply, tokenize + from dask.threaded import get + + token = tokenize(frag_filters, columns, schema) + name = f"pq-file-{token}" + dsk = { + (name, i): ( + CudfReadParquetPyarrowFS._fragment_to_table, + frag, + filter, + columns, + schema, + ) + for i, (frag, filter) in enumerate(frag_filters) + } + dsk[name] = ( + apply, + pa.concat_tables, + [list(dsk.keys())], + {"promote_options": "permissive"}, + ) + + return CudfReadParquetPyarrowFS._table_to_pandas( + get(dsk, name), + **to_pandas_kwargs, + ) + + +def read_parquet_expr( + path, + *args, + columns=None, + filters=None, + categories=None, + index=None, + storage_options=None, + dtype_backend=None, + calculate_divisions=False, + ignore_metadata_file=False, + metadata_task_size=None, + split_row_groups="infer", + blocksize="default", + aggregate_files=None, + parquet_file_extension=(".parq", ".parquet", ".pq"), + filesystem="fsspec", + engine=None, + arrow_to_pandas=None, + open_file_options=None, + **kwargs, +): + """ + Read a Parquet file into a Dask-cuDF DataFrame. + + This reads a directory of Parquet data into a DataFrame collection. + Partitioning behavior mostly depends on the ``blocksize`` argument. + + .. note:: + Dask may automatically resize partitions at optimization time. + Please set ``blocksize=None`` to disable this behavior in Dask cuDF. + (NOTE: This will not disable fusion for the "pandas" backend) + + .. note:: + Specifying ``filesystem="arrow"`` leverages a complete reimplementation of + the Parquet reader that is solely based on PyArrow. It is faster than the + legacy implementation in some cases, but doesn't yet support all features. + + Parameters + ---------- + path : str or list + Source directory for data, or path(s) to individual parquet files. + Prefix with a protocol like ``s3://`` to read from alternative + filesystems. To read from multiple files you can pass a globstring or a + list of paths, with the caveat that they must all have the same + protocol. + columns : str or list, default None + Field name(s) to read in as columns in the output. By default all + non-index fields will be read (as determined by the pandas parquet + metadata, if present). Provide a single field name instead of a list to + read in the data as a Series. + filters : Union[List[Tuple[str, str, Any]], List[List[Tuple[str, str, Any]]]], default None + List of filters to apply, like ``[[('col1', '==', 0), ...], ...]``. + Using this argument will result in row-wise filtering of the final partitions. + + Predicates can be expressed in disjunctive normal form (DNF). This means that + the inner-most tuple describes a single column predicate. These inner predicates + are combined with an AND conjunction into a larger predicate. The outer-most + list then combines all of the combined filters with an OR disjunction. + + Predicates can also be expressed as a ``List[Tuple]``. These are evaluated + as an AND conjunction. To express OR in predicates, one must use the + (preferred for "pyarrow") ``List[List[Tuple]]`` notation. + index : str, list or False, default None + Field name(s) to use as the output frame index. By default will be + inferred from the pandas parquet file metadata, if present. Use ``False`` + to read all fields as columns. + categories : list or dict, default None + For any fields listed here, if the parquet encoding is Dictionary, + the column will be created with dtype category. Use only if it is + guaranteed that the column is encoded as dictionary in all row-groups. + If a list, assumes up to 2**16-1 labels; if a dict, specify the number + of labels expected; if None, will load categories automatically for + data written by dask, not otherwise. + storage_options : dict, default None + Key/value pairs to be passed on to the file-system backend, if any. + Note that the default file-system backend can be configured with the + ``filesystem`` argument, described below. + calculate_divisions : bool, default False + Whether to use min/max statistics from the footer metadata (or global + ``_metadata`` file) to calculate divisions for the output DataFrame + collection. Divisions will not be calculated if statistics are missing. + This option will be ignored if ``index`` is not specified and there is + no physical index column specified in the custom "pandas" Parquet + metadata. Note that ``calculate_divisions=True`` may be extremely slow + when no global ``_metadata`` file is present, especially when reading + from remote storage. Set this to ``True`` only when known divisions + are needed for your workload (see :ref:`dataframe-design-partitions`). + ignore_metadata_file : bool, default False + Whether to ignore the global ``_metadata`` file (when one is present). + If ``True``, or if the global ``_metadata`` file is missing, the parquet + metadata may be gathered and processed in parallel. Parallel metadata + processing is currently supported for ``ArrowDatasetEngine`` only. + metadata_task_size : int, default configurable + If parquet metadata is processed in parallel (see ``ignore_metadata_file`` + description above), this argument can be used to specify the number of + dataset files to be processed by each task in the Dask graph. If this + argument is set to ``0``, parallel metadata processing will be disabled. + The default values for local and remote filesystems can be specified + with the "metadata-task-size-local" and "metadata-task-size-remote" + config fields, respectively (see "dataframe.parquet"). + split_row_groups : 'infer', 'adaptive', bool, or int, default 'infer' + WARNING: The ``split_row_groups`` argument is now deprecated, please use + ``blocksize`` instead. + + blocksize : int, float or str, default 'default' + The desired size of each output ``DataFrame`` partition in terms of total + (uncompressed) parquet storage space. This argument may be used to split + large files or aggregate small files into the same partition. Use ``None`` + for a simple 1:1 mapping between files and partitions. Use a float value + less than 1.0 to specify the fractional size of the partitions with + respect to the total memory of the first NVIDIA GPU on your machine. + Default is 1/32 the total memory of a single GPU. + aggregate_files : bool or str, default None + WARNING: The behavior of ``aggregate_files=True`` is now obsolete + when query-planning is enabled (the default). Small files are now + aggregated automatically according to the ``blocksize`` setting. + Please expect this argument to be deprecated in a future release. + + WARNING: Passing a string argument to ``aggregate_files`` will result + in experimental behavior that may be removed at any time. + + parquet_file_extension: str, tuple[str], or None, default (".parq", ".parquet", ".pq") + A file extension or an iterable of extensions to use when discovering + parquet files in a directory. Files that don't match these extensions + will be ignored. This argument only applies when ``paths`` corresponds + to a directory and no ``_metadata`` file is present (or + ``ignore_metadata_file=True``). Passing in ``parquet_file_extension=None`` + will treat all files in the directory as parquet files. + + The purpose of this argument is to ensure that the engine will ignore + unsupported metadata files (like Spark's '_SUCCESS' and 'crc' files). + It may be necessary to change this argument if the data files in your + parquet dataset do not end in ".parq", ".parquet", or ".pq". + filesystem: "fsspec", "arrow", or fsspec.AbstractFileSystem backend to use. + dataset: dict, default None + Dictionary of options to use when creating a ``pyarrow.dataset.Dataset`` object. + These options may include a "filesystem" key to configure the desired + file-system backend. However, the top-level ``filesystem`` argument will always + take precedence. + + **Note**: The ``dataset`` options may include a "partitioning" key. + However, since ``pyarrow.dataset.Partitioning`` + objects cannot be serialized, the value can be a dict of key-word + arguments for the ``pyarrow.dataset.partitioning`` API + (e.g. ``dataset={"partitioning": {"flavor": "hive", "schema": ...}}``). + Note that partitioned columns will not be converted to categorical + dtypes when a custom partitioning schema is specified in this way. + read: dict, default None + Dictionary of options to pass through to ``CudfEngine.read_partitions`` + using the ``read`` key-word argument. + """ + + import dask_expr as dx + from fsspec.utils import stringify_path + from pyarrow import fs as pa_fs + + from dask.core import flatten + from dask.dataframe.utils import pyarrow_strings_enabled + + from dask_cudf.backends import PYARROW_GE_15 + + if args: + raise ValueError(f"Unexpected positional arguments: {args}") + + if open_file_options is not None: + raise ValueError( + "The open_file_options argument is no longer supported " + "by the 'cudf' backend." + ) + if dtype_backend is not None: + raise NotImplementedError( + "dtype_backend is not supported by the 'cudf' backend." + ) + if arrow_to_pandas is not None: + raise NotImplementedError( + "arrow_to_pandas is not supported by the 'cudf' backend." + ) + if engine not in (None, "cudf", CudfEngine): + raise NotImplementedError( + "engine={engine} is not supported by the 'cudf' backend." + ) + + if not isinstance(path, str): + path = stringify_path(path) + + kwargs["dtype_backend"] = None + if arrow_to_pandas: + kwargs["arrow_to_pandas"] = None + + if filters is not None: + for filter in flatten(filters, container=list): + _, op, val = filter + if op == "in" and not isinstance(val, (set, list, tuple)): + raise TypeError( + "Value of 'in' filter must be a list, set or tuple." + ) + + # Normalize blocksize input + if blocksize == "default": + blocksize = _normalize_blocksize() + elif isinstance(blocksize, float) and blocksize < 1: + blocksize = _normalize_blocksize(blocksize) + + if ( + isinstance(filesystem, pa_fs.FileSystem) + or isinstance(filesystem, str) + and filesystem.lower() in ("arrow", "pyarrow") + ): + # EXPERIMENTAL filesystem="arrow" support. + # This code path may use PyArrow for remote IO. + + # CudfReadParquetPyarrowFS requires import of distributed beforehand + # (See: https://github.com/dask/dask/issues/11352) + import distributed # noqa: F401 + + if not PYARROW_GE_15: + raise ValueError( + "pyarrow>=15.0.0 is required to use the pyarrow filesystem." + ) + if metadata_task_size is not None: + warnings.warn( + "metadata_task_size is not supported when using the pyarrow filesystem." + " This argument will be ignored!" + ) + if aggregate_files is not None: + warnings.warn( + "aggregate_files is not supported when using the pyarrow filesystem." + " This argument will be ignored!" + ) + if split_row_groups != "infer": + warnings.warn( + "split_row_groups is not supported when using the pyarrow filesystem." + " This argument will be ignored!" + ) + if parquet_file_extension != (".parq", ".parquet", ".pq"): + raise NotImplementedError( + "parquet_file_extension is not supported when using the pyarrow filesystem." + ) + + return dx.new_collection( + NoOp( + CudfReadParquetPyarrowFS( + path, + columns=_convert_to_list(columns), + filters=filters, + categories=categories, + index=index, + calculate_divisions=calculate_divisions, + storage_options=storage_options, + filesystem=filesystem, + blocksize=blocksize, + ignore_metadata_file=ignore_metadata_file, + arrow_to_pandas=None, + pyarrow_strings_enabled=pyarrow_strings_enabled(), + kwargs=kwargs, + _series=isinstance(columns, str), + ), + ) + ) + + return dx.new_collection( + NoOp( + CudfReadParquetFSSpec( + path, + columns=_convert_to_list(columns), + filters=filters, + categories=categories, + index=index, + blocksize=blocksize, + storage_options=storage_options, + calculate_divisions=calculate_divisions, + ignore_metadata_file=ignore_metadata_file, + metadata_task_size=metadata_task_size, + split_row_groups=split_row_groups, + aggregate_files=aggregate_files, + parquet_file_extension=parquet_file_extension, + filesystem=filesystem, + engine=CudfEngine, + kwargs=kwargs, + _series=isinstance(columns, str), + ), + ) + ) + + +if QUERY_PLANNING_ON: + read_parquet = read_parquet_expr + read_parquet.__doc__ = read_parquet_expr.__doc__ +else: + read_parquet = _deprecated_api( + "The legacy dask_cudf.io.parquet.read_parquet API", + new_api="dask_cudf.read_parquet", + rec="", + ) to_parquet = _deprecated_api( "dask_cudf.io.parquet.to_parquet", new_api="dask_cudf._legacy.io.parquet.to_parquet", diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index 522a21e12a5..6efe6c4f388 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -46,7 +46,7 @@ def test_roundtrip_backend_dispatch(tmpdir): tmpdir = str(tmpdir) ddf.to_parquet(tmpdir, engine="pyarrow") with dask.config.set({"dataframe.backend": "cudf"}): - ddf2 = dd.read_parquet(tmpdir, index=False) + ddf2 = dd.read_parquet(tmpdir, index=False, blocksize=None) assert isinstance(ddf2, dask_cudf.DataFrame) dd.assert_eq(ddf.reset_index(drop=False), ddf2) @@ -100,7 +100,7 @@ def test_roundtrip_from_dask_index_false(tmpdir): tmpdir = str(tmpdir) ddf.to_parquet(tmpdir, engine="pyarrow") - ddf2 = dask_cudf.read_parquet(tmpdir, index=False) + ddf2 = dask_cudf.read_parquet(tmpdir, index=False, blocksize=None) dd.assert_eq(ddf.reset_index(drop=False), ddf2) @@ -667,7 +667,7 @@ def test_to_parquet_append(tmpdir, write_metadata_file): write_metadata_file=write_metadata_file, write_index=False, ) - ddf2 = dask_cudf.read_parquet(tmpdir) + ddf2 = dask_cudf.read_parquet(tmpdir, blocksize=None) dd.assert_eq(cudf.concat([df, df]), ddf2) @@ -677,13 +677,17 @@ def test_deprecated_api_paths(tmpdir): with pytest.warns(match="dask_cudf.io.to_parquet is now deprecated"): dask_cudf.io.to_parquet(df, tmpdir) - # Encourage top-level read_parquet import only - with pytest.warns(match="dask_cudf.io.read_parquet is now deprecated"): + if dask_cudf.QUERY_PLANNING_ON: df2 = dask_cudf.io.read_parquet(tmpdir) - dd.assert_eq(df, df2, check_divisions=False) + dd.assert_eq(df, df2, check_divisions=False) - with pytest.warns( - match="dask_cudf.io.parquet.read_parquet is now deprecated" - ): df2 = dask_cudf.io.parquet.read_parquet(tmpdir) - dd.assert_eq(df, df2, check_divisions=False) + dd.assert_eq(df, df2, check_divisions=False) + else: + with pytest.warns(match="legacy dask_cudf.io.read_parquet"): + df2 = dask_cudf.io.read_parquet(tmpdir) + dd.assert_eq(df, df2, check_divisions=False) + + with pytest.warns(match="legacy dask_cudf.io.parquet.read_parquet"): + df2 = dask_cudf.io.parquet.read_parquet(tmpdir) + dd.assert_eq(df, df2, check_divisions=False) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 07d9143db36..9364cc7647f 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -19,12 +19,12 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cudf==24.12.*,>=0.0.0a0", + "cudf==25.2.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", "numpy>=1.23,<3.0a0", "pandas>=2.0,<2.2.4dev0", - "rapids-dask-dependency==24.12.*,>=0.0.0a0", + "rapids-dask-dependency==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -45,7 +45,7 @@ cudf = "dask_cudf.backends:CudfDXBackendEntrypoint" [project.optional-dependencies] test = [ - "dask-cuda==24.12.*,>=0.0.0a0", + "dask-cuda==25.2.*,>=0.0.0a0", "numba-cuda>=0.0.13,<0.0.18", "pytest-cov", "pytest-xdist", diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 8c650eb2144..ac85298bc40 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -38,7 +38,7 @@ classifiers = [ "Environment :: GPU :: NVIDIA CUDA", ] dependencies = [ - "libkvikio==24.12.*,>=0.0.0a0", + "libkvikio==25.2.*,>=0.0.0a0", "nvidia-nvcomp==4.1.0.6", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. @@ -78,7 +78,7 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", - "libkvikio==24.12.*,>=0.0.0a0", - "librmm==24.12.*,>=0.0.0a0", + "libkvikio==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/pylibcudf/pylibcudf/expressions.pyi b/python/pylibcudf/pylibcudf/expressions.pyi index 12b473d8605..4dcccaaa1fc 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyi +++ b/python/pylibcudf/pylibcudf/expressions.pyi @@ -77,3 +77,5 @@ class Operation(Expression): left: Expression, right: Expression | None = None, ): ... + +def to_expression(expr: str, column_names: tuple[str, ...]) -> Expression: ... diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index 0f12cfe313c..31121785e27 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -1,4 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import ast +import functools + +import pyarrow as pa + from pylibcudf.libcudf.expressions import \ ast_operator as ASTOperator # no-cython-lint from pylibcudf.libcudf.expressions import \ @@ -46,6 +51,8 @@ from .scalar cimport Scalar from .traits cimport is_chrono, is_numeric from .types cimport DataType +from .interop import from_arrow + # Aliases for simplicity ctypedef unique_ptr[libcudf_exp.expression] expression_ptr @@ -57,6 +64,7 @@ __all__ = [ "Literal", "Operation", "TableReference", + "to_expression" ] # Define this class just to have a docstring for it @@ -261,3 +269,217 @@ cdef class ColumnNameReference(Expression): move(make_unique[libcudf_exp.column_name_reference]( (name.encode("utf-8")) )) + + +# This dictionary encodes the mapping from Python AST operators to their cudf +# counterparts. +_python_cudf_operator_map = { + # Binary operators + ast.Add: ASTOperator.ADD, + ast.Sub: ASTOperator.SUB, + ast.Mult: ASTOperator.MUL, + ast.Div: ASTOperator.DIV, + ast.FloorDiv: ASTOperator.FLOOR_DIV, + ast.Mod: ASTOperator.PYMOD, + ast.Pow: ASTOperator.POW, + ast.Eq: ASTOperator.EQUAL, + ast.NotEq: ASTOperator.NOT_EQUAL, + ast.Lt: ASTOperator.LESS, + ast.Gt: ASTOperator.GREATER, + ast.LtE: ASTOperator.LESS_EQUAL, + ast.GtE: ASTOperator.GREATER_EQUAL, + ast.BitXor: ASTOperator.BITWISE_XOR, + # TODO: The mapping of logical/bitwise operators here is inconsistent with + # pandas. In pandas, Both `BitAnd` and `And` map to + # `ASTOperator.LOGICAL_AND` for booleans, while they map to + # `ASTOperator.BITWISE_AND` for integers. However, there is no good way to + # encode this at present because expressions can be arbitrarily nested so + # we won't know the dtype of the input without inserting a much more + # complex traversal of the expression tree to determine the output types at + # each node. For now, we'll rely on users to use the appropriate operator. + ast.BitAnd: ASTOperator.BITWISE_AND, + ast.BitOr: ASTOperator.BITWISE_OR, + ast.And: ASTOperator.LOGICAL_AND, + ast.Or: ASTOperator.LOGICAL_OR, + # Unary operators + ast.Invert: ASTOperator.BIT_INVERT, + ast.Not: ASTOperator.NOT, + # TODO: Missing USub, possibility other unary ops? +} + + +# Mapping between Python function names encode in an ast.Call node and the +# corresponding libcudf C++ AST operators. +_python_cudf_function_map = { + # TODO: Operators listed on + # https://pandas.pydata.org/pandas-docs/stable/user_guide/enhancingperf.html#expression-evaluation-via-eval # noqa: E501 + # that we don't support yet: + # expm1, log1p, arctan2 and log10. + "isnull": ASTOperator.IS_NULL, + "isna": ASTOperator.IS_NULL, + "sin": ASTOperator.SIN, + "cos": ASTOperator.COS, + "tan": ASTOperator.TAN, + "arcsin": ASTOperator.ARCSIN, + "arccos": ASTOperator.ARCCOS, + "arctan": ASTOperator.ARCTAN, + "sinh": ASTOperator.SINH, + "cosh": ASTOperator.COSH, + "tanh": ASTOperator.TANH, + "arcsinh": ASTOperator.ARCSINH, + "arccosh": ASTOperator.ARCCOSH, + "arctanh": ASTOperator.ARCTANH, + "exp": ASTOperator.EXP, + "log": ASTOperator.LOG, + "sqrt": ASTOperator.SQRT, + "abs": ASTOperator.ABS, + "ceil": ASTOperator.CEIL, + "floor": ASTOperator.FLOOR, + # TODO: Operators supported by libcudf with no Python function analog. + # ast.rint: ASTOperator.RINT, + # ast.cbrt: ASTOperator.CBRT, +} + + +class ExpressionTransformer(ast.NodeVisitor): + """A NodeVisitor specialized for constructing a libcudf expression tree. + + This visitor is designed to handle AST nodes that have libcudf equivalents. + It constructs column references from names and literals from constants, + then builds up operations. The resulting expression is returned by the + `visit` method + + Parameters + ---------- + column_mapping : dict[str, ColumnNameReference | ColumnReference] + Mapping from names to column references or column name references. + The former can be used for `compute_column` the latter in IO filters. + """ + + def __init__(self, dict column_mapping): + self.column_mapping = column_mapping + + def generic_visit(self, node): + raise ValueError( + f"Not expecting expression to have node of type {node.__class__.__name__}" + ) + + def visit_Module(self, node): + try: + expr, = node.body + except ValueError: + raise ValueError( + f"Expecting exactly one expression, not {len(node.body)}" + ) + return self.visit(expr) + + def visit_Expr(self, node): + return self.visit(node.value) + + def visit_Name(self, node): + try: + return self.column_mapping[node.id] + except KeyError: + raise ValueError(f"Unknown column name {node.id}") + + def visit_Constant(self, node): + if not isinstance(node.value, (float, int, str, complex)): + raise ValueError( + f"Unsupported literal {repr(node.value)} of type " + "{type(node.value).__name__}" + ) + return Literal(from_arrow(pa.scalar(node.value))) + + def visit_UnaryOp(self, node): + operand = self.visit(node.operand) + if isinstance(node.op, ast.USub): + # TODO: Except for leaf nodes, we won't know the type of the + # operand, so there's no way to know whether this should be a float + # or an int. We should maybe see what Spark does, and this will + # probably require casting. + minus_one = Literal(from_arrow(pa.scalar(-1))) + return Operation(ASTOperator.MUL, minus_one, operand) + elif isinstance(node.op, ast.UAdd): + return operand + else: + op = _python_cudf_operator_map[type(node.op)] + return Operation(op, operand) + + def visit_BinOp(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + op = _python_cudf_operator_map[type(node.op)] + return Operation(op, left, right) + + def visit_BoolOp(self, node): + return functools.reduce( + functools.partial(Operation, ASTOperator.LOGICAL_AND), + ( + Operation( + _python_cudf_operator_map[type(node.op)], + self.visit(left), + self.visit(right), + ) + for left, right in zip( + node.values[:-1], node.values[1:], strict=True + ) + ) + ) + + def visit_Compare(self, node): + operands = [node.left, *node.comparators] + return functools.reduce( + functools.partial(Operation, ASTOperator.LOGICAL_AND), + ( + Operation( + _python_cudf_operator_map[type(op)], + self.visit(left), + self.visit(right), + ) + for op, left, right in zip( + node.ops, operands[:-1], operands[1:], strict=True + ) + ) + ) + + def visit_Call(self, node): + try: + op = _python_cudf_function_map[node.func.id] + except KeyError: + raise ValueError(f"Unsupported function {node.func}.") + # Assuming only unary functions are supported, which is checked above. + if len(node.args) != 1 or node.keywords: + raise ValueError( + f"Function {node.func} only accepts one positional " + "argument." + ) + return Operation(op, self.visit(node.args[0])) + + +@functools.lru_cache(256) +def to_expression(str expr, tuple column_names): + """ + Create an expression for `pylibcudf.transform.compute_column`. + + Parameters + ---------- + expr : str + The expression to evaluate. In (restricted) Python syntax. + column_names : tuple[str] + Ordered tuple of names. When calling `compute_column` on the resulting + expression, the provided table must have columns in the same order + as given here. + + Notes + ----- + This function keeps a small cache of recently used expressions. + + Returns + ------- + Expression + Expression for the given expr and col_names + """ + visitor = ExpressionTransformer( + {name: ColumnReference(i) for i, name in enumerate(column_names)} + ) + return visitor.visit(ast.parse(expr)) diff --git a/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd b/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd index 58c579b86de..52d1e572ff3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd @@ -5,6 +5,7 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport ( data_type, interpolation, @@ -94,71 +95,78 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: ZERO_NORMALIZED ONE_NORMALIZED - cdef unique_ptr[T] make_sum_aggregation[T]() except + + cdef unique_ptr[T] make_sum_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_product_aggregation[T]() except + + cdef unique_ptr[T] make_product_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_min_aggregation[T]() except + + cdef unique_ptr[T] make_min_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_max_aggregation[T]() except + + cdef unique_ptr[T] make_max_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_count_aggregation[T](null_policy) except + + cdef unique_ptr[T] make_count_aggregation[T]( + null_policy + ) except +libcudf_exception_handler - cdef unique_ptr[T] make_any_aggregation[T]() except + + cdef unique_ptr[T] make_any_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_all_aggregation[T]() except + + cdef unique_ptr[T] make_all_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_sum_of_squares_aggregation[T]() except + + cdef unique_ptr[T] make_sum_of_squares_aggregation[T]()\ + except +libcudf_exception_handler - cdef unique_ptr[T] make_mean_aggregation[T]() except + + cdef unique_ptr[T] make_mean_aggregation[T]() except +libcudf_exception_handler cdef unique_ptr[T] make_variance_aggregation[T]( - size_type ddof) except + + size_type ddof) except +libcudf_exception_handler - cdef unique_ptr[T] make_std_aggregation[T](size_type ddof) except + + cdef unique_ptr[T] make_std_aggregation[T]( + size_type ddof + ) except +libcudf_exception_handler - cdef unique_ptr[T] make_median_aggregation[T]() except + + cdef unique_ptr[T] make_median_aggregation[T]() except +libcudf_exception_handler cdef unique_ptr[T] make_quantile_aggregation[T]( - vector[double] q, interpolation i) except + + vector[double] q, interpolation i) except +libcudf_exception_handler - cdef unique_ptr[T] make_argmax_aggregation[T]() except + + cdef unique_ptr[T] make_argmax_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_argmin_aggregation[T]() except + + cdef unique_ptr[T] make_argmin_aggregation[T]() except +libcudf_exception_handler - cdef unique_ptr[T] make_nunique_aggregation[T](null_policy null_handling) except + + cdef unique_ptr[T] make_nunique_aggregation[T]( + null_policy null_handling + ) except +libcudf_exception_handler cdef unique_ptr[T] make_nth_element_aggregation[T]( size_type n, null_policy null_handling - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[T] make_collect_list_aggregation[T]( null_policy null_handling - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[T] make_collect_set_aggregation[T]( null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[T] make_udf_aggregation[T]( udf_type type, string user_defined_aggregator, - data_type output_type) except + + data_type output_type) except +libcudf_exception_handler cdef unique_ptr[T] make_ewma_aggregation[T]( double com, ewm_history adjust - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[T] make_correlation_aggregation[T]( - correlation_type type, size_type min_periods) except + + correlation_type type, size_type min_periods) except +libcudf_exception_handler cdef unique_ptr[T] make_covariance_aggregation[T]( - size_type min_periods, size_type ddof) except + + size_type min_periods, size_type ddof) except +libcudf_exception_handler cdef unique_ptr[T] make_rank_aggregation[T]( rank_method method, order column_order, null_policy null_handling, null_order null_precedence, - rank_percentage percentage) except + + rank_percentage percentage) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd index d39767b4aa8..607f7c2fa60 100644 --- a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp cimport bool from libcpp.memory cimport unique_ptr diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd index 76f35cbba71..0f412ba4765 100644 --- a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport ( column_view, mutable_column_view, @@ -19,15 +19,15 @@ cdef extern from "cudf/column/column.hpp" namespace "cudf" nogil: vector[unique_ptr[column]] children cdef cppclass column: - column() except + - column(const column& other) except + + column() except +libcudf_exception_handler + column(const column& other) except +libcudf_exception_handler - column(column_view view) except + + column(column_view view) except +libcudf_exception_handler - size_type size() except + - size_type null_count() except + - bool has_nulls() except + - data_type type() except + - column_view view() except + - mutable_column_view mutable_view() except + - column_contents release() except + + size_type size() except +libcudf_exception_handler + size_type null_count() except +libcudf_exception_handler + bool has_nulls() except +libcudf_exception_handler + data_type type() except +libcudf_exception_handler + column_view view() except +libcudf_exception_handler + mutable_column_view mutable_view() except +libcudf_exception_handler + column_contents release() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd index b2388858127..162822d2365 100644 --- a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport ( @@ -15,68 +15,80 @@ from rmm.librmm.device_buffer cimport device_buffer cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: - cdef unique_ptr[column] make_numeric_column(data_type type, - size_type size, - mask_state state) except + + cdef unique_ptr[column] make_numeric_column( + data_type type, + size_type size, + mask_state state + ) except +libcudf_exception_handler - cdef unique_ptr[column] make_numeric_column(data_type type, - size_type size, - device_buffer mask, - size_type null_count) except + + cdef unique_ptr[column] make_numeric_column( + data_type type, + size_type size, + device_buffer mask, + size_type null_count + ) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_point_column( data_type type, size_type size, - mask_state state) except + + mask_state state) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_point_column( data_type type, size_type size, device_buffer mask, - size_type null_count) except + + size_type null_count) except +libcudf_exception_handler cdef unique_ptr[column] make_timestamp_column( data_type type, size_type size, - mask_state state) except + + mask_state state) except +libcudf_exception_handler cdef unique_ptr[column] make_timestamp_column( data_type type, size_type size, device_buffer mask, - size_type null_count) except + + size_type null_count) except +libcudf_exception_handler cdef unique_ptr[column] make_duration_column( data_type type, size_type size, - mask_state state) except + + mask_state state) except +libcudf_exception_handler cdef unique_ptr[column] make_duration_column( data_type type, size_type size, device_buffer mask, - size_type null_count) except + + size_type null_count) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_width_column( data_type type, size_type size, - mask_state state) except + + mask_state state) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_width_column( data_type type, size_type size, device_buffer mask, - size_type null_count) except + + size_type null_count) except +libcudf_exception_handler - cdef unique_ptr[column] make_column_from_scalar(const scalar& s, - size_type size) except + + cdef unique_ptr[column] make_column_from_scalar( + const scalar& s, + size_type size + ) except +libcudf_exception_handler - cdef unique_ptr[column] make_dictionary_from_scalar(const scalar& s, - size_type size) except + + cdef unique_ptr[column] make_dictionary_from_scalar( + const scalar& s, + size_type size + ) except +libcudf_exception_handler - cdef unique_ptr[column] make_empty_column(type_id id) except + - cdef unique_ptr[column] make_empty_column(data_type type_) except + + cdef unique_ptr[column] make_empty_column( + type_id id + ) except +libcudf_exception_handler + cdef unique_ptr[column] make_empty_column( + data_type type_ + ) except +libcudf_exception_handler cdef unique_ptr[column] make_dictionary_column( unique_ptr[column] keys_column, - unique_ptr[column] indices_column) except + + unique_ptr[column] indices_column) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_view.pxd index c0e971eb5bd..105bea7b8ef 100644 --- a/python/pylibcudf/pylibcudf/libcudf/column/column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/column/column_view.pxd @@ -1,29 +1,29 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type cdef extern from "cudf/column/column_view.hpp" namespace "cudf" nogil: cdef cppclass column_view: - column_view() except + - column_view(const column_view& other) except + + column_view() except +libcudf_exception_handler + column_view(const column_view& other) except +libcudf_exception_handler - column_view& operator=(const column_view&) except + + column_view& operator=(const column_view&) except +libcudf_exception_handler column_view( data_type type, size_type size, const void* data - ) except + + ) except +libcudf_exception_handler column_view( data_type type, size_type size, const void* data, const bitmask_type* null_mask - ) except + + ) except +libcudf_exception_handler column_view( data_type type, @@ -31,7 +31,7 @@ cdef extern from "cudf/column/column_view.hpp" namespace "cudf" nogil: const void* data, const bitmask_type* null_mask, size_type null_count - ) except + + ) except +libcudf_exception_handler column_view( data_type type, @@ -40,7 +40,7 @@ cdef extern from "cudf/column/column_view.hpp" namespace "cudf" nogil: const bitmask_type* null_mask, size_type null_count, size_type offset - ) except + + ) except +libcudf_exception_handler column_view( data_type type, @@ -50,37 +50,41 @@ cdef extern from "cudf/column/column_view.hpp" namespace "cudf" nogil: size_type null_count, size_type offset, vector[column_view] children - ) except + - - const T* data[T]() except + - const T* head[T]() except + - const bitmask_type* null_mask() except + - size_type size() except + - data_type type() except + - bool nullable() except + - size_type null_count() except + - bool has_nulls() except + - size_type offset() except + - size_type num_children() except + - column_view child(size_type) except + + ) except +libcudf_exception_handler + + const T* data[T]() except +libcudf_exception_handler + const T* head[T]() except +libcudf_exception_handler + const bitmask_type* null_mask() except +libcudf_exception_handler + size_type size() except +libcudf_exception_handler + data_type type() except +libcudf_exception_handler + bool nullable() except +libcudf_exception_handler + size_type null_count() except +libcudf_exception_handler + bool has_nulls() except +libcudf_exception_handler + size_type offset() except +libcudf_exception_handler + size_type num_children() except +libcudf_exception_handler + column_view child(size_type) except +libcudf_exception_handler cdef cppclass mutable_column_view: - mutable_column_view() except + - mutable_column_view(const mutable_column_view&) except + - mutable_column_view& operator=(const mutable_column_view&) except + + mutable_column_view() except +libcudf_exception_handler + mutable_column_view( + const mutable_column_view& + ) except +libcudf_exception_handler + mutable_column_view& operator=( + const mutable_column_view& + ) except +libcudf_exception_handler mutable_column_view( data_type type, size_type size, const void* data - ) except + + ) except +libcudf_exception_handler mutable_column_view( data_type type, size_type size, const void* data, const bitmask_type* null_mask - ) except + + ) except +libcudf_exception_handler mutable_column_view( data_type type, @@ -88,7 +92,7 @@ cdef extern from "cudf/column/column_view.hpp" namespace "cudf" nogil: const void* data, const bitmask_type* null_mask, size_type null_count - ) except + + ) except +libcudf_exception_handler mutable_column_view( data_type type, @@ -97,22 +101,22 @@ cdef extern from "cudf/column/column_view.hpp" namespace "cudf" nogil: const bitmask_type* null_mask, size_type null_count, size_type offset - ) except + + ) except +libcudf_exception_handler mutable_column_view( data_type type, size_type size, const void* data, const bitmask_type* null_mask, size_type null_count, size_type offset, vector[mutable_column_view] children - ) except + - - T* data[T]() except + - T* head[T]() except + - bitmask_type* null_mask() except + - size_type size() except + - data_type type() except + - bool nullable() except + - size_type null_count() except + - bool has_nulls() except + - size_type offset() except + - size_type num_children() except + - mutable_column_view& child(size_type) except + + ) except +libcudf_exception_handler + + T* data[T]() except +libcudf_exception_handler + T* head[T]() except +libcudf_exception_handler + bitmask_type* null_mask() except +libcudf_exception_handler + size_type size() except +libcudf_exception_handler + data_type type() except +libcudf_exception_handler + bool nullable() except +libcudf_exception_handler + size_type null_count() except +libcudf_exception_handler + bool has_nulls() except +libcudf_exception_handler + size_type offset() except +libcudf_exception_handler + size_type num_children() except +libcudf_exception_handler + mutable_column_view& child(size_type) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd index def292148c5..0a827b21cda 100644 --- a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column, column_view from pylibcudf.libcudf.table.table cimport table, table_view from pylibcudf.libcudf.utilities.span cimport host_span @@ -15,7 +15,13 @@ cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil: # constructable from a vector. In case they are needed in the future, # host_span versions can be added, e.g: # - # cdef unique_ptr[column] concatenate(host_span[column_view] columns) except + + # cdef unique_ptr[column] concatenate( + # host_span[column_view] columns + # ) except +libcudf_exception_handler - cdef unique_ptr[column] concatenate(const vector[column_view] columns) except + - cdef unique_ptr[table] concatenate(const vector[table_view] tables) except + + cdef unique_ptr[column] concatenate( + const vector[column_view] columns + ) except +libcudf_exception_handler + cdef unique_ptr[table] concatenate( + const vector[table_view] tables + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd index 12090af16cc..9df828015eb 100644 --- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. - from libc.stdint cimport uint8_t from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type @@ -21,13 +21,17 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil: cdef vector[contiguous_split_result] contiguous_split ( table_view input_table, vector[size_type] splits - ) except + + ) except +libcudf_exception_handler - cdef packed_columns pack (const table_view& input) except + + cdef packed_columns pack ( + const table_view& input + ) except +libcudf_exception_handler - cdef table_view unpack (const packed_columns& input) except + + cdef table_view unpack ( + const packed_columns& input + ) except +libcudf_exception_handler cdef table_view unpack ( const uint8_t* metadata, const uint8_t* gpu_data - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd index e6e719d6436..5a05284e86a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/copying.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t, int64_t, uint8_t from libcpp cimport bool from libcpp.functional cimport reference_wrapper diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index 8bbc120cff8..049a1b06c2e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -2,6 +2,7 @@ from libc.stdint cimport int32_t, uint8_t from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar @@ -20,26 +21,40 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: MICROSECOND NANOSECOND - cdef unique_ptr[column] extract_year(const column_view& column) except + - cdef unique_ptr[column] extract_month(const column_view& column) except + - cdef unique_ptr[column] extract_day(const column_view& column) except + - cdef unique_ptr[column] extract_weekday(const column_view& column) except + - cdef unique_ptr[column] extract_hour(const column_view& column) except + - cdef unique_ptr[column] extract_minute(const column_view& column) except + - cdef unique_ptr[column] extract_second(const column_view& column) except + + cdef unique_ptr[column] extract_year( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] extract_month( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] extract_day( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] extract_weekday( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] extract_hour( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] extract_minute( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] extract_second( + const column_view& column + ) except +libcudf_exception_handler cdef unique_ptr[column] extract_millisecond_fraction( const column_view& column - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] extract_microsecond_fraction( const column_view& column - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] extract_nanosecond_fraction( const column_view& column - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] extract_datetime_component( const column_view& column, datetime_component component - ) except + + ) except +libcudf_exception_handler cpdef enum class rounding_frequency(int32_t): DAY @@ -52,26 +67,34 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] ceil_datetimes( const column_view& column, rounding_frequency freq - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] floor_datetimes( const column_view& column, rounding_frequency freq - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] round_datetimes( const column_view& column, rounding_frequency freq - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const scalar& months - ) except + - cdef unique_ptr[column] day_of_year(const column_view& column) except + - cdef unique_ptr[column] is_leap_year(const column_view& column) except + + ) except +libcudf_exception_handler + cdef unique_ptr[column] day_of_year( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] is_leap_year( + const column_view& column + ) except +libcudf_exception_handler cdef unique_ptr[column] last_day_of_month( const column_view& column - ) except + - cdef unique_ptr[column] extract_quarter(const column_view& column) except + - cdef unique_ptr[column] days_in_month(const column_view& column) except + + ) except +libcudf_exception_handler + cdef unique_ptr[column] extract_quarter( + const column_view& column + ) except +libcudf_exception_handler + cdef unique_ptr[column] days_in_month( + const column_view& column + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/experimental.pxd b/python/pylibcudf/pylibcudf/libcudf/experimental.pxd index f280a382a04..764815fba36 100644 --- a/python/pylibcudf/pylibcudf/libcudf/experimental.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/experimental.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/utilities/prefetch.hpp" \ diff --git a/python/pylibcudf/pylibcudf/libcudf/expressions.pxd b/python/pylibcudf/pylibcudf/libcudf/expressions.pxd index 5ba2dff6074..0e42d2bd02c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/expressions.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/expressions.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.scalar.scalar cimport ( duration_scalar, @@ -75,15 +75,15 @@ cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil: cdef cppclass literal(expression): # Due to https://github.com/cython/cython/issues/3198, we need to # specify a return type for templated constructors. - literal literal[T](numeric_scalar[T] &) except + - literal literal[T](timestamp_scalar[T] &) except + - literal literal[T](duration_scalar[T] &) except + + literal literal[T](numeric_scalar[T] &) except +libcudf_exception_handler + literal literal[T](timestamp_scalar[T] &) except +libcudf_exception_handler + literal literal[T](duration_scalar[T] &) except +libcudf_exception_handler cdef cppclass column_reference(expression): # Allow for default C++ parameters by declaring multiple constructors # with the default parameters optionally omitted. - column_reference(size_type) except + - column_reference(size_type, table_reference) except + + column_reference(size_type) except +libcudf_exception_handler + column_reference(size_type, table_reference) except +libcudf_exception_handler cdef cppclass operation(expression): operation(ast_operator, const expression &) @@ -92,4 +92,4 @@ cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil: cdef cppclass column_name_reference(expression): # column_name_reference is only meant for use in file I/O such as the # Parquet reader. - column_name_reference(string) except + + column_name_reference(string) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/filling.pxd index 7bed80050d2..f0bfe8ca80b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/filling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/filling.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport ( column_view, @@ -19,33 +19,33 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type begin, size_type end, const scalar & value - ) except + + ) except +libcudf_exception_handler cdef void fill_in_place( const mutable_column_view & destination, size_type beign, size_type end, const scalar & value - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] repeat( const table_view & input, const column_view & count, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] repeat( const table_view & input, size_type count - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] sequence( size_type size, const scalar & init, const scalar & step - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] calendrical_month_sequence( size_type n, const scalar& init, size_type months, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/fixed_point/fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/fixed_point/fixed_point.pxd index e55574020f4..a4461f34ab2 100644 --- a/python/pylibcudf/pylibcudf/libcudf/fixed_point/fixed_point.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/fixed_point/fixed_point.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/fixed_point/fixed_point.hpp" namespace "numeric" nogil: diff --git a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd index 17ea33a2066..cbbc174d7bf 100644 --- a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd @@ -1,10 +1,10 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.functional cimport reference_wrapper from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.aggregation cimport ( groupby_aggregation, groupby_scan_aggregation, @@ -31,12 +31,12 @@ cdef extern from "cudf/groupby.hpp" \ namespace "cudf::groupby" nogil: cdef cppclass aggregation_request: - aggregation_request() except + + aggregation_request() except +libcudf_exception_handler column_view values vector[unique_ptr[groupby_aggregation]] aggregations cdef cppclass scan_request: - scan_request() except + + scan_request() except +libcudf_exception_handler column_view values vector[unique_ptr[groupby_scan_aggregation]] aggregations @@ -50,24 +50,24 @@ cdef extern from "cudf/groupby.hpp" \ unique_ptr[table] values cdef cppclass groupby: - groupby(const table_view& keys) except + + groupby(const table_view& keys) except +libcudf_exception_handler groupby( const table_view& keys, null_policy include_null_keys - ) except + + ) except +libcudf_exception_handler groupby( const table_view& keys, null_policy include_null_keys, sorted keys_are_sorted, - ) except + + ) except +libcudf_exception_handler groupby( const table_view& keys, null_policy include_null_keys, sorted keys_are_sorted, const vector[order]& column_order, - ) except + + ) except +libcudf_exception_handler groupby( const table_view& keys, @@ -75,21 +75,21 @@ cdef extern from "cudf/groupby.hpp" \ sorted keys_are_sorted, const vector[order]& column_order, const vector[null_order]& null_precedence - ) except + + ) except +libcudf_exception_handler pair[ unique_ptr[table], vector[aggregation_result] ] aggregate( const vector[aggregation_request]& requests, - ) except + + ) except +libcudf_exception_handler pair[ unique_ptr[table], vector[aggregation_result] ] scan( const vector[scan_request]& requests, - ) except + + ) except +libcudf_exception_handler pair[ unique_ptr[table], @@ -98,12 +98,12 @@ cdef extern from "cudf/groupby.hpp" \ const table_view values, const vector[size_type] offset, const vector[reference_wrapper[constscalar]] fill_values - ) except + + ) except +libcudf_exception_handler - groups get_groups() except + - groups get_groups(table_view values) except + + groups get_groups() except +libcudf_exception_handler + groups get_groups(table_view values) except +libcudf_exception_handler pair[unique_ptr[table], unique_ptr[table]] replace_nulls( const table_view& values, const vector[replace_policy] replace_policy - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd index c4222bc9dc5..4e8a01b41a5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/hash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport uint32_t, uint64_t from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd index b75e9ca7001..8953357a087 100644 --- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar @@ -12,19 +12,19 @@ from pylibcudf.libcudf.table.table_view cimport table_view cdef extern from "dlpack/dlpack.h" nogil: ctypedef struct DLManagedTensor: - void(*deleter)(DLManagedTensor*) except + + void(*deleter)(DLManagedTensor*) except +libcudf_exception_handler # The Arrow structs are not namespaced. cdef extern from "cudf/interop.hpp" nogil: cdef struct ArrowSchema: - void (*release)(ArrowSchema*) noexcept nogil + void (*release)(ArrowSchema*) noexcept cdef struct ArrowArray: - void (*release)(ArrowArray*) noexcept nogil + void (*release)(ArrowArray*) noexcept cdef struct ArrowArrayStream: - void (*release)(ArrowArrayStream*) noexcept nogil + void (*release)(ArrowArrayStream*) noexcept cdef struct ArrowDeviceArray: ArrowArray array @@ -34,23 +34,25 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \ nogil: cdef unique_ptr[table] from_dlpack( const DLManagedTensor* managed_tensor - ) except + + ) except +libcudf_exception_handler DLManagedTensor* to_dlpack( const table_view& input - ) except + + ) except +libcudf_exception_handler cdef cppclass column_metadata: - column_metadata() except + - column_metadata(string name_) except + + column_metadata() except +libcudf_exception_handler + column_metadata(string name_) except +libcudf_exception_handler string name vector[column_metadata] children_meta - cdef unique_ptr[table] from_arrow_stream(ArrowArrayStream* input) except + + cdef unique_ptr[table] from_arrow_stream( + ArrowArrayStream* input + ) except +libcudf_exception_handler cdef unique_ptr[column] from_arrow_column( const ArrowSchema* schema, const ArrowArray* input - ) except + + ) except +libcudf_exception_handler cdef extern from *: @@ -84,5 +86,7 @@ cdef extern from *: cdef ArrowSchema *to_arrow_schema_raw( const table_view& tbl, const vector[column_metadata]& metadata, - ) except + nogil - cdef ArrowArray* to_arrow_host_raw(const table_view& tbl) except + nogil + ) except +libcudf_exception_handler nogil + cdef ArrowArray* to_arrow_host_raw( + const table_view& tbl + ) except +libcudf_exception_handler nogil diff --git a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd index 2d76e2f6c80..cac55640ac9 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.io.types as cudf_io_types from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport size_type @@ -10,34 +10,40 @@ cdef extern from "cudf/io/avro.hpp" \ namespace "cudf::io" nogil: cdef cppclass avro_reader_options: - avro_reader_options() except + - cudf_io_types.source_info get_source() except + - vector[string] get_columns() except + - size_type get_skip_rows() except + - size_type get_num_rows() except + + avro_reader_options() except +libcudf_exception_handler + cudf_io_types.source_info get_source() except +libcudf_exception_handler + vector[string] get_columns() except +libcudf_exception_handler + size_type get_skip_rows() except +libcudf_exception_handler + size_type get_num_rows() except +libcudf_exception_handler # setters - void set_columns(vector[string] col_names) except + - void set_skip_rows(size_type val) except + - void set_num_rows(size_type val) except + + void set_columns(vector[string] col_names) except +libcudf_exception_handler + void set_skip_rows(size_type val) except +libcudf_exception_handler + void set_num_rows(size_type val) except +libcudf_exception_handler @staticmethod avro_reader_options_builder builder( cudf_io_types.source_info src - ) except + + ) except +libcudf_exception_handler cdef cppclass avro_reader_options_builder: - avro_reader_options_builder() except + + avro_reader_options_builder() except +libcudf_exception_handler avro_reader_options_builder( cudf_io_types.source_info src - ) except + - avro_reader_options_builder& columns(vector[string] col_names) except + - avro_reader_options_builder& skip_rows(size_type val) except + - avro_reader_options_builder& num_rows(size_type val) except + - - avro_reader_options build() except + + ) except +libcudf_exception_handler + avro_reader_options_builder& columns( + vector[string] col_names + ) except +libcudf_exception_handler + avro_reader_options_builder& skip_rows( + size_type val + ) except +libcudf_exception_handler + avro_reader_options_builder& num_rows( + size_type val + ) except +libcudf_exception_handler + + avro_reader_options build() except +libcudf_exception_handler cdef cudf_io_types.table_with_metadata read_avro( avro_reader_options &options - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd index 73a6d98650c..7ca158016a2 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.io.types as cudf_io_types cimport pylibcudf.libcudf.table.table_view as cudf_table_view from libc.stdint cimport uint8_t @@ -8,6 +7,7 @@ from libcpp.map cimport map from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type @@ -15,227 +15,319 @@ cdef extern from "cudf/io/csv.hpp" \ namespace "cudf::io" nogil: cdef cppclass csv_reader_options: - csv_reader_options() except + + csv_reader_options() except +libcudf_exception_handler # Getter - cudf_io_types.source_info get_source() except + + cudf_io_types.source_info get_source() except +libcudf_exception_handler # Reader settings - cudf_io_types.compression_type get_compression() except + - size_t get_byte_range_offset() except + - size_t get_byte_range_size() except + - vector[string] get_names() except + - string get_prefix() except + - bool is_enabled_mangle_dupe_cols() except + + cudf_io_types.compression_type get_compression()\ + except +libcudf_exception_handler + size_t get_byte_range_offset() except +libcudf_exception_handler + size_t get_byte_range_size() except +libcudf_exception_handler + vector[string] get_names() except +libcudf_exception_handler + string get_prefix() except +libcudf_exception_handler + bool is_enabled_mangle_dupe_cols() except +libcudf_exception_handler # Filter settings - vector[string] get_use_cols_names() except + - vector[int] get_use_cols_indexes() except + - size_type get_nrows() except + - size_type get_skiprows() except + - size_type get_skipfooter() except + - size_type get_header() except + + vector[string] get_use_cols_names() except +libcudf_exception_handler + vector[int] get_use_cols_indexes() except +libcudf_exception_handler + size_type get_nrows() except +libcudf_exception_handler + size_type get_skiprows() except +libcudf_exception_handler + size_type get_skipfooter() except +libcudf_exception_handler + size_type get_header() except +libcudf_exception_handler # Parsing settings - char get_lineterminator() except + - char get_delimiter() except + - char get_thousands() except + - char get_decimal() except + - char get_comment() except + - bool is_enabled_windowslinetermination() except + - bool is_enabled_delim_whitespace() except + - bool is_enabled_skipinitialspace() except + - bool is_enabled_skip_blank_lines() except + - cudf_io_types.quote_style get_quoting() except + - char get_quotechar() except + - bool is_enabled_doublequote() except + - bool is_enabled_updated_quotes_detection() except + - vector[string] get_parse_dates_names() except + - vector[int] get_parse_dates_indexes() except + - vector[string] get_parse_hex_names() except + - vector[int] get_parse_hex_indexes() except + + char get_lineterminator() except +libcudf_exception_handler + char get_delimiter() except +libcudf_exception_handler + char get_thousands() except +libcudf_exception_handler + char get_decimal() except +libcudf_exception_handler + char get_comment() except +libcudf_exception_handler + bool is_enabled_windowslinetermination() except +libcudf_exception_handler + bool is_enabled_delim_whitespace() except +libcudf_exception_handler + bool is_enabled_skipinitialspace() except +libcudf_exception_handler + bool is_enabled_skip_blank_lines() except +libcudf_exception_handler + cudf_io_types.quote_style get_quoting() except +libcudf_exception_handler + char get_quotechar() except +libcudf_exception_handler + bool is_enabled_doublequote() except +libcudf_exception_handler + bool is_enabled_updated_quotes_detection() except +libcudf_exception_handler + vector[string] get_parse_dates_names() except +libcudf_exception_handler + vector[int] get_parse_dates_indexes() except +libcudf_exception_handler + vector[string] get_parse_hex_names() except +libcudf_exception_handler + vector[int] get_parse_hex_indexes() except +libcudf_exception_handler # Conversion settings - vector[string] get_dtype() except + - vector[string] get_true_values() except + - vector[string] get_false_values() except + - vector[string] get_na_values() except + - bool is_enabled_keep_default_na() except + - bool is_enabled_na_filter() except + - bool is_enabled_dayfirst() except + + vector[string] get_dtype() except +libcudf_exception_handler + vector[string] get_true_values() except +libcudf_exception_handler + vector[string] get_false_values() except +libcudf_exception_handler + vector[string] get_na_values() except +libcudf_exception_handler + bool is_enabled_keep_default_na() except +libcudf_exception_handler + bool is_enabled_na_filter() except +libcudf_exception_handler + bool is_enabled_dayfirst() except +libcudf_exception_handler # setter # Reader settings - void set_compression(cudf_io_types.compression_type comp) except + - void set_byte_range_offset(size_t val) except + - void set_byte_range_size(size_t val) except + - void set_names(vector[string] val) except + - void set_prefix(string pfx) except + - void set_mangle_dupe_cols(bool val) except + + void set_compression( + cudf_io_types.compression_type comp + ) except +libcudf_exception_handler + void set_byte_range_offset(size_t val) except +libcudf_exception_handler + void set_byte_range_size(size_t val) except +libcudf_exception_handler + void set_names(vector[string] val) except +libcudf_exception_handler + void set_prefix(string pfx) except +libcudf_exception_handler + void set_mangle_dupe_cols(bool val) except +libcudf_exception_handler # Filter settings - void set_use_cols_names(vector[string] col_names) except + - void set_use_cols_indexes(vector[int] col_ind) except + - void set_nrows(size_type n_rows) except + - void set_skiprows(size_type val) except + - void set_skipfooter(size_type val) except + - void set_header(size_type hdr) except + + void set_use_cols_names( + vector[string] col_names + ) except +libcudf_exception_handler + void set_use_cols_indexes( + vector[int] col_ind + ) except +libcudf_exception_handler + void set_nrows(size_type n_rows) except +libcudf_exception_handler + void set_skiprows(size_type val) except +libcudf_exception_handler + void set_skipfooter(size_type val) except +libcudf_exception_handler + void set_header(size_type hdr) except +libcudf_exception_handler # Parsing settings - void set_lineterminator(char val) except + - void set_delimiter(char val) except + - void set_thousands(char val) except + - void set_decimal(char val) except + - void set_comment(char val) except + - void enable_windowslinetermination(bool val) except + - void enable_delim_whitespace(bool val) except + - void enable_skipinitialspace(bool val) except + - void enable_skip_blank_lines(bool val) except + - void set_quoting(cudf_io_types.quote_style style) except + - void set_quotechar(char val) except + - void set_doublequote(bool val) except + - void set_detect_whitespace_around_quotes(bool val) except + - void set_parse_dates(vector[string]) except + - void set_parse_dates(vector[int]) except + - void set_parse_hex(vector[string]) except + - void set_parse_hex(vector[int]) except + + void set_lineterminator(char val) except +libcudf_exception_handler + void set_delimiter(char val) except +libcudf_exception_handler + void set_thousands(char val) except +libcudf_exception_handler + void set_decimal(char val) except +libcudf_exception_handler + void set_comment(char val) except +libcudf_exception_handler + void enable_windowslinetermination(bool val) except +libcudf_exception_handler + void enable_delim_whitespace(bool val) except +libcudf_exception_handler + void enable_skipinitialspace(bool val) except +libcudf_exception_handler + void enable_skip_blank_lines(bool val) except +libcudf_exception_handler + void set_quoting( + cudf_io_types.quote_style style + ) except +libcudf_exception_handler + void set_quotechar(char val) except +libcudf_exception_handler + void set_doublequote(bool val) except +libcudf_exception_handler + void set_detect_whitespace_around_quotes( + bool val + ) except +libcudf_exception_handler + void set_parse_dates(vector[string]) except +libcudf_exception_handler + void set_parse_dates(vector[int]) except +libcudf_exception_handler + void set_parse_hex(vector[string]) except +libcudf_exception_handler + void set_parse_hex(vector[int]) except +libcudf_exception_handler # Conversion settings - void set_dtypes(vector[data_type] types) except + - void set_dtypes(map[string, data_type] types) except + - void set_true_values(vector[string] vals) except + - void set_false_values(vector[string] vals) except + - void set_na_values(vector[string] vals) except + - void enable_keep_default_na(bool val) except + - void enable_na_filter(bool val) except + - void enable_dayfirst(bool val) except + - void set_timestamp_type(data_type type) except + + void set_dtypes(vector[data_type] types) except +libcudf_exception_handler + void set_dtypes(map[string, data_type] types) except +libcudf_exception_handler + void set_true_values(vector[string] vals) except +libcudf_exception_handler + void set_false_values(vector[string] vals) except +libcudf_exception_handler + void set_na_values(vector[string] vals) except +libcudf_exception_handler + void enable_keep_default_na(bool val) except +libcudf_exception_handler + void enable_na_filter(bool val) except +libcudf_exception_handler + void enable_dayfirst(bool val) except +libcudf_exception_handler + void set_timestamp_type(data_type type) except +libcudf_exception_handler @staticmethod csv_reader_options_builder builder( cudf_io_types.source_info src - ) except + + ) except +libcudf_exception_handler cdef cppclass csv_reader_options_builder: - csv_reader_options_builder() except + + csv_reader_options_builder() except +libcudf_exception_handler csv_reader_options_builder( cudf_io_types.source_info src - ) except + + ) except +libcudf_exception_handler csv_reader_options_builder& source( cudf_io_types.source_info info - ) except + + ) except +libcudf_exception_handler # Reader settings csv_reader_options_builder& compression( cudf_io_types.compression_type comp - ) except + - csv_reader_options_builder& byte_range_offset(size_t val) except + - csv_reader_options_builder& byte_range_size(size_t val) except + - csv_reader_options_builder& names(vector[string] val) except + - csv_reader_options_builder& prefix(string pfx) except + - csv_reader_options_builder& mangle_dupe_cols(bool val) except + + ) except +libcudf_exception_handler + csv_reader_options_builder& byte_range_offset( + size_t val + ) except +libcudf_exception_handler + csv_reader_options_builder& byte_range_size( + size_t val + ) except +libcudf_exception_handler + csv_reader_options_builder& names( + vector[string] val + ) except +libcudf_exception_handler + csv_reader_options_builder& prefix( + string pfx + ) except +libcudf_exception_handler + csv_reader_options_builder& mangle_dupe_cols( + bool val + ) except +libcudf_exception_handler # Filter settings csv_reader_options_builder& use_cols_names( vector[string] col_names - ) except + + ) except +libcudf_exception_handler csv_reader_options_builder& use_cols_indexes( vector[int] col_ind - ) except + - csv_reader_options_builder& nrows(size_type n_rows) except + - csv_reader_options_builder& skiprows(size_type val) except + - csv_reader_options_builder& skipfooter(size_type val) except + - csv_reader_options_builder& header(size_type hdr) except + + ) except +libcudf_exception_handler + csv_reader_options_builder& nrows( + size_type n_rows + ) except +libcudf_exception_handler + csv_reader_options_builder& skiprows( + size_type val + ) except +libcudf_exception_handler + csv_reader_options_builder& skipfooter( + size_type val + ) except +libcudf_exception_handler + csv_reader_options_builder& header( + size_type hdr + ) except +libcudf_exception_handler # Parsing settings - csv_reader_options_builder& lineterminator(char val) except + - csv_reader_options_builder& delimiter(char val) except + - csv_reader_options_builder& thousands(char val) except + - csv_reader_options_builder& decimal(char val) except + - csv_reader_options_builder& comment(char val) except + - csv_reader_options_builder& windowslinetermination(bool val) except + - csv_reader_options_builder& delim_whitespace(bool val) except + - csv_reader_options_builder& skipinitialspace(bool val) except + - csv_reader_options_builder& skip_blank_lines(bool val) except + + csv_reader_options_builder& lineterminator( + char val + ) except +libcudf_exception_handler + csv_reader_options_builder& delimiter( + char val + ) except +libcudf_exception_handler + csv_reader_options_builder& thousands( + char val + ) except +libcudf_exception_handler + csv_reader_options_builder& decimal( + char val + ) except +libcudf_exception_handler + csv_reader_options_builder& comment( + char val + ) except +libcudf_exception_handler + csv_reader_options_builder& windowslinetermination( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& delim_whitespace( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& skipinitialspace( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& skip_blank_lines( + bool val + ) except +libcudf_exception_handler csv_reader_options_builder& quoting( cudf_io_types.quote_style style - ) except + - csv_reader_options_builder& quotechar(char val) except + - csv_reader_options_builder& doublequote(bool val) except + - csv_reader_options_builder& detect_whitespace_around_quotes(bool val) except + - csv_reader_options_builder& parse_dates(vector[string]) except + - csv_reader_options_builder& parse_dates(vector[int]) except + + ) except +libcudf_exception_handler + csv_reader_options_builder& quotechar( + char val + ) except +libcudf_exception_handler + csv_reader_options_builder& doublequote( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& detect_whitespace_around_quotes( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& parse_dates( + vector[string] + ) except +libcudf_exception_handler + csv_reader_options_builder& parse_dates( + vector[int] + ) except +libcudf_exception_handler # Conversion settings - csv_reader_options_builder& dtypes(vector[string] types) except + - csv_reader_options_builder& dtypes(vector[data_type] types) except + + csv_reader_options_builder& dtypes( + vector[string] types) except +libcudf_exception_handler + csv_reader_options_builder& dtypes( + vector[data_type] types + ) except +libcudf_exception_handler csv_reader_options_builder& dtypes( map[string, data_type] types - ) except + - csv_reader_options_builder& true_values(vector[string] vals) except + - csv_reader_options_builder& false_values(vector[string] vals) except + - csv_reader_options_builder& na_values(vector[string] vals) except + - csv_reader_options_builder& keep_default_na(bool val) except + - csv_reader_options_builder& na_filter(bool val) except + - csv_reader_options_builder& dayfirst(bool val) except + - csv_reader_options_builder& timestamp_type(data_type type) except + + ) except +libcudf_exception_handler + csv_reader_options_builder& true_values( + vector[string] vals + ) except +libcudf_exception_handler + csv_reader_options_builder& false_values( + vector[string] vals + ) except +libcudf_exception_handler + csv_reader_options_builder& na_values( + vector[string] vals + ) except +libcudf_exception_handler + csv_reader_options_builder& keep_default_na( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& na_filter( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& dayfirst( + bool val + ) except +libcudf_exception_handler + csv_reader_options_builder& timestamp_type( + data_type type + ) except +libcudf_exception_handler - csv_reader_options build() except + + csv_reader_options build() except +libcudf_exception_handler cdef cudf_io_types.table_with_metadata read_csv( csv_reader_options &options - ) except + + ) except +libcudf_exception_handler cdef cppclass csv_writer_options: - csv_writer_options() except + - - cudf_io_types.sink_info get_sink() except + - cudf_table_view.table_view get_table() except + - cudf_io_types.table_metadata get_metadata() except + - string get_na_rep() except + - bool is_enabled_include_header() except + - size_type get_rows_per_chunk() except + - string get_line_terminator() except + - char get_inter_column_delimiter() except + - string get_true_value() except + - string get_false_value() except + - vector[string] get_names() except + + csv_writer_options() except +libcudf_exception_handler + + cudf_io_types.sink_info get_sink() except +libcudf_exception_handler + cudf_table_view.table_view get_table() except +libcudf_exception_handler + cudf_io_types.table_metadata get_metadata() except +libcudf_exception_handler + string get_na_rep() except +libcudf_exception_handler + bool is_enabled_include_header() except +libcudf_exception_handler + size_type get_rows_per_chunk() except +libcudf_exception_handler + string get_line_terminator() except +libcudf_exception_handler + char get_inter_column_delimiter() except +libcudf_exception_handler + string get_true_value() except +libcudf_exception_handler + string get_false_value() except +libcudf_exception_handler + vector[string] get_names() except +libcudf_exception_handler # setter - void set_metadata(cudf_io_types.table_metadata* val) except + - void set_na_rep(string val) except + - void enable_include_header(bool val) except + - void set_rows_per_chunk(size_type val) except + - void set_line_terminator(string term) except + - void set_inter_column_delimiter(char delim) except + - void set_true_value(string val) except + - void set_false_value(string val) except + - void set_names(vector[string] val) except + + void set_metadata( + cudf_io_types.table_metadata* val + ) except +libcudf_exception_handler + void set_na_rep(string val) except +libcudf_exception_handler + void enable_include_header(bool val) except +libcudf_exception_handler + void set_rows_per_chunk(size_type val) except +libcudf_exception_handler + void set_line_terminator(string term) except +libcudf_exception_handler + void set_inter_column_delimiter(char delim) except +libcudf_exception_handler + void set_true_value(string val) except +libcudf_exception_handler + void set_false_value(string val) except +libcudf_exception_handler + void set_names(vector[string] val) except +libcudf_exception_handler @staticmethod csv_writer_options_builder builder( cudf_io_types.sink_info sink, cudf_table_view.table_view table - ) except + + ) except +libcudf_exception_handler cdef cppclass csv_writer_options_builder: - csv_writer_options_builder() except + + csv_writer_options_builder() except +libcudf_exception_handler csv_writer_options_builder( cudf_io_types.sink_info sink, cudf_table_view.table_view table - ) except + + ) except +libcudf_exception_handler - csv_writer_options_builder& names(vector[string] val) except + - csv_writer_options_builder& na_rep(string val) except + - csv_writer_options_builder& include_header(bool val) except + - csv_writer_options_builder& rows_per_chunk(size_type val) except + - csv_writer_options_builder& line_terminator(string term) except + - csv_writer_options_builder& inter_column_delimiter(char delim) except + - csv_writer_options_builder& true_value(string val) except + - csv_writer_options_builder& false_value(string val) except + + csv_writer_options_builder& names( + vector[string] val + ) except +libcudf_exception_handler + csv_writer_options_builder& na_rep( + string val + ) except +libcudf_exception_handler + csv_writer_options_builder& include_header( + bool val + ) except +libcudf_exception_handler + csv_writer_options_builder& rows_per_chunk( + size_type val + ) except +libcudf_exception_handler + csv_writer_options_builder& line_terminator( + string term + ) except +libcudf_exception_handler + csv_writer_options_builder& inter_column_delimiter( + char delim + ) except +libcudf_exception_handler + csv_writer_options_builder& true_value( + string val + ) except +libcudf_exception_handler + csv_writer_options_builder& false_value( + string val + ) except +libcudf_exception_handler - csv_writer_options build() except + + csv_writer_options build() except +libcudf_exception_handler - cdef void write_csv(csv_writer_options args) except + + cdef void write_csv(csv_writer_options args) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/data_sink.pxd b/python/pylibcudf/pylibcudf/libcudf/io/data_sink.pxd index e939a47d7f9..00f35bbf4e4 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/data_sink.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/data_sink.pxd @@ -1,4 +1,5 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/io/data_sink.hpp" \ diff --git a/python/pylibcudf/pylibcudf/libcudf/io/datasource.pxd b/python/pylibcudf/pylibcudf/libcudf/io/datasource.pxd index c69aa65bd3c..cda7d940c91 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/datasource.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/datasource.pxd @@ -1,4 +1,5 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/io/datasource.hpp" \ diff --git a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd index 1c74f8ca3ac..a7ca6978621 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.io.types as cudf_io_types cimport pylibcudf.libcudf.table.table_view as cudf_table_view from libc.stdint cimport int32_t, uint8_t @@ -8,6 +7,7 @@ from libcpp.map cimport map from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type @@ -23,133 +23,154 @@ cdef extern from "cudf/io/json.hpp" \ RECOVER_WITH_NULL cdef cppclass json_reader_options: - json_reader_options() except + - cudf_io_types.source_info get_source() except + - vector[string] get_dtypes() except + - cudf_io_types.compression_type get_compression() except + - size_t get_byte_range_offset() except + - size_t get_byte_range_size() except + - bool is_enabled_lines() except + - bool is_enabled_mixed_types_as_string() except + - bool is_enabled_prune_columns() except + - bool is_enabled_dayfirst() except + - bool is_enabled_experimental() except + + json_reader_options() except +libcudf_exception_handler + cudf_io_types.source_info get_source() except +libcudf_exception_handler + vector[string] get_dtypes() except +libcudf_exception_handler + cudf_io_types.compression_type get_compression()\ + except +libcudf_exception_handler + size_t get_byte_range_offset() except +libcudf_exception_handler + size_t get_byte_range_size() except +libcudf_exception_handler + bool is_enabled_lines() except +libcudf_exception_handler + bool is_enabled_mixed_types_as_string() except +libcudf_exception_handler + bool is_enabled_prune_columns() except +libcudf_exception_handler + bool is_enabled_dayfirst() except +libcudf_exception_handler + bool is_enabled_experimental() except +libcudf_exception_handler # setter - void set_dtypes(vector[data_type] types) except + - void set_dtypes(map[string, schema_element] types) except + + void set_dtypes( + vector[data_type] types + ) except +libcudf_exception_handler + void set_dtypes( + map[string, schema_element] types + ) except +libcudf_exception_handler void set_compression( cudf_io_types.compression_type compression - ) except + - void set_byte_range_offset(size_t offset) except + - void set_byte_range_size(size_t size) except + - void enable_lines(bool val) except + - void enable_mixed_types_as_string(bool val) except + - void enable_prune_columns(bool val) except + - void enable_dayfirst(bool val) except + - void enable_experimental(bool val) except + - void enable_keep_quotes(bool val) except + + ) except +libcudf_exception_handler + void set_byte_range_offset(size_t offset) except +libcudf_exception_handler + void set_byte_range_size(size_t size) except +libcudf_exception_handler + void enable_lines(bool val) except +libcudf_exception_handler + void enable_mixed_types_as_string(bool val) except +libcudf_exception_handler + void enable_prune_columns(bool val) except +libcudf_exception_handler + void enable_dayfirst(bool val) except +libcudf_exception_handler + void enable_experimental(bool val) except +libcudf_exception_handler + void enable_keep_quotes(bool val) except +libcudf_exception_handler @staticmethod json_reader_options_builder builder( cudf_io_types.source_info src - ) except + + ) except +libcudf_exception_handler cdef cppclass json_reader_options_builder: - json_reader_options_builder() except + + json_reader_options_builder() except +libcudf_exception_handler json_reader_options_builder( cudf_io_types.source_info src - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& dtypes( vector[string] types - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& dtypes( vector[data_type] types - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& dtypes( map[string, schema_element] types - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& compression( cudf_io_types.compression_type compression - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& byte_range_offset( size_t offset - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& byte_range_size( size_t size - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& lines( bool val - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& mixed_types_as_string( bool val - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& prune_columns( bool val - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& dayfirst( bool val - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& keep_quotes( bool val - ) except + + ) except +libcudf_exception_handler json_reader_options_builder& recovery_mode( json_recovery_mode_t val - ) except + + ) except +libcudf_exception_handler - json_reader_options build() except + + json_reader_options build() except +libcudf_exception_handler cdef cudf_io_types.table_with_metadata read_json( - json_reader_options &options) except + + json_reader_options &options) except +libcudf_exception_handler cdef cppclass json_writer_options: - json_writer_options() except + - cudf_io_types.sink_info get_sink() except + - cudf_table_view.table_view get_table() except + - string get_na_rep() except + - bool is_enabled_include_nulls() except + - bool is_enabled_lines() except + - bool is_enabled_experimental() except + - size_type get_rows_per_chunk() except + - string get_true_value() except + - string get_false_value() except + + json_writer_options() except +libcudf_exception_handler + cudf_io_types.sink_info get_sink() except +libcudf_exception_handler + cudf_table_view.table_view get_table() except +libcudf_exception_handler + string get_na_rep() except +libcudf_exception_handler + bool is_enabled_include_nulls() except +libcudf_exception_handler + bool is_enabled_lines() except +libcudf_exception_handler + bool is_enabled_experimental() except +libcudf_exception_handler + size_type get_rows_per_chunk() except +libcudf_exception_handler + string get_true_value() except +libcudf_exception_handler + string get_false_value() except +libcudf_exception_handler # setter - void set_table(cudf_table_view.table_view tbl) except + - void set_metadata(cudf_io_types.table_metadata meta) except + - void set_na_rep(string val) except + - void enable_include_nulls(bool val) except + - void enable_lines(bool val) except + - void set_rows_per_chunk(size_type val) except + - void set_true_value(string val) except + - void set_false_value(string val) except + + void set_table( + cudf_table_view.table_view tbl + ) except +libcudf_exception_handler + void set_metadata( + cudf_io_types.table_metadata meta + ) except +libcudf_exception_handler + void set_na_rep(string val) except +libcudf_exception_handler + void enable_include_nulls(bool val) except +libcudf_exception_handler + void enable_lines(bool val) except +libcudf_exception_handler + void set_rows_per_chunk(size_type val) except +libcudf_exception_handler + void set_true_value(string val) except +libcudf_exception_handler + void set_false_value(string val) except +libcudf_exception_handler @staticmethod json_writer_options_builder builder( cudf_io_types.sink_info sink, cudf_table_view.table_view tbl - ) except + + ) except +libcudf_exception_handler cdef cppclass json_writer_options_builder: - json_writer_options_builder() except + + json_writer_options_builder() except +libcudf_exception_handler json_writer_options_builder( cudf_io_types.source_info src, cudf_table_view.table_view tbl - ) except + + ) except +libcudf_exception_handler json_writer_options_builder& table( cudf_table_view.table_view tbl - ) except + + ) except +libcudf_exception_handler json_writer_options_builder& metadata( cudf_io_types.table_metadata meta - ) except + - json_writer_options_builder& na_rep(string val) except + - json_writer_options_builder& include_nulls(bool val) except + - json_writer_options_builder& lines(bool val) except + - json_writer_options_builder& rows_per_chunk(size_type val) except + - json_writer_options_builder& true_value(string val) except + - json_writer_options_builder& false_value(string val) except + - - json_writer_options build() except + + ) except +libcudf_exception_handler + json_writer_options_builder& na_rep( + string val + ) except +libcudf_exception_handler + json_writer_options_builder& include_nulls( + bool val + ) except +libcudf_exception_handler + json_writer_options_builder& lines( + bool val + ) except +libcudf_exception_handler + json_writer_options_builder& rows_per_chunk( + size_type val + ) except +libcudf_exception_handler + json_writer_options_builder& true_value( + string val + ) except +libcudf_exception_handler + json_writer_options_builder& false_value( + string val + ) except +libcudf_exception_handler + + json_writer_options build() except +libcudf_exception_handler cdef cudf_io_types.table_with_metadata write_json( - json_writer_options &options) except + + json_writer_options &options) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd index dca24c7f665..f5485da1d51 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.io.types as cudf_io_types cimport pylibcudf.libcudf.table.table_view as cudf_table_view from libc.stdint cimport int64_t, uint8_t @@ -9,6 +8,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.optional cimport optional from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type @@ -16,160 +16,206 @@ cdef extern from "cudf/io/orc.hpp" \ namespace "cudf::io" nogil: cdef cppclass orc_reader_options: - orc_reader_options() except + - - cudf_io_types.source_info get_source() except + - vector[vector[size_type]] get_stripes() except + - int64_t get_skip_rows() except + - optional[int64_t] get_num_rows() except + - bool is_enabled_use_index() except + - bool is_enabled_use_np_dtypes() except + - data_type get_timestamp_type() except + - bool is_enabled_decimals_as_float64() except + - int get_forced_decimals_scale() except + - - void set_columns(vector[string] col_names) except + - void set_stripes(vector[vector[size_type]] strps) except + - void set_skip_rows(int64_t rows) except + - void set_num_rows(int64_t nrows) except + - void enable_use_index(bool val) except + - void enable_use_np_dtypes(bool val) except + - void set_timestamp_type(data_type type) except + - void set_decimal128_columns(vector[string] val) except + + orc_reader_options() except +libcudf_exception_handler + + cudf_io_types.source_info get_source() except +libcudf_exception_handler + vector[vector[size_type]] get_stripes() except +libcudf_exception_handler + int64_t get_skip_rows() except +libcudf_exception_handler + optional[int64_t] get_num_rows() except +libcudf_exception_handler + bool is_enabled_use_index() except +libcudf_exception_handler + bool is_enabled_use_np_dtypes() except +libcudf_exception_handler + data_type get_timestamp_type() except +libcudf_exception_handler + bool is_enabled_decimals_as_float64() except +libcudf_exception_handler + int get_forced_decimals_scale() except +libcudf_exception_handler + + void set_columns(vector[string] col_names) except +libcudf_exception_handler + void set_stripes( + vector[vector[size_type]] strps + ) except +libcudf_exception_handler + void set_skip_rows(int64_t rows) except +libcudf_exception_handler + void set_num_rows(int64_t nrows) except +libcudf_exception_handler + void enable_use_index(bool val) except +libcudf_exception_handler + void enable_use_np_dtypes(bool val) except +libcudf_exception_handler + void set_timestamp_type(data_type type) except +libcudf_exception_handler + void set_decimal128_columns( + vector[string] val + ) except +libcudf_exception_handler @staticmethod orc_reader_options_builder builder( cudf_io_types.source_info src - ) except + + ) except +libcudf_exception_handler cdef cppclass orc_reader_options_builder: - orc_reader_options_builder() except + - orc_reader_options_builder(cudf_io_types.source_info &src) except + - - orc_reader_options_builder& columns(vector[string] col_names) except + + orc_reader_options_builder() except +libcudf_exception_handler + orc_reader_options_builder( + cudf_io_types.source_info &src + ) except +libcudf_exception_handler + + orc_reader_options_builder& columns( + vector[string] col_names + ) except +libcudf_exception_handler orc_reader_options_builder& \ - stripes(vector[vector[size_type]] strps) except + - orc_reader_options_builder& skip_rows(int64_t rows) except + - orc_reader_options_builder& num_rows(int64_t nrows) except + - orc_reader_options_builder& use_index(bool val) except + - orc_reader_options_builder& use_np_dtypes(bool val) except + - orc_reader_options_builder& timestamp_type(data_type type) except + - - orc_reader_options build() except + + stripes(vector[vector[size_type]] strps) except +libcudf_exception_handler + orc_reader_options_builder& skip_rows( + int64_t rows + ) except +libcudf_exception_handler + orc_reader_options_builder& num_rows( + int64_t nrows + ) except +libcudf_exception_handler + orc_reader_options_builder& use_index( + bool val + ) except +libcudf_exception_handler + orc_reader_options_builder& use_np_dtypes( + bool val + ) except +libcudf_exception_handler + orc_reader_options_builder& timestamp_type( + data_type type + ) except +libcudf_exception_handler + + orc_reader_options build() except +libcudf_exception_handler cdef cudf_io_types.table_with_metadata read_orc( orc_reader_options opts - ) except + + ) except +libcudf_exception_handler cdef cppclass orc_writer_options: orc_writer_options() - cudf_io_types.sink_info get_sink() except + - cudf_io_types.compression_type get_compression() except + - bool is_enabled_statistics() except + - size_t get_stripe_size_bytes() except + - size_type get_stripe_size_rows() except + - size_type get_row_index_stride() except + - cudf_table_view.table_view get_table() except + + cudf_io_types.sink_info get_sink() except +libcudf_exception_handler + cudf_io_types.compression_type get_compression()\ + except +libcudf_exception_handler + bool is_enabled_statistics() except +libcudf_exception_handler + size_t get_stripe_size_bytes() except +libcudf_exception_handler + size_type get_stripe_size_rows() except +libcudf_exception_handler + size_type get_row_index_stride() except +libcudf_exception_handler + cudf_table_view.table_view get_table() except +libcudf_exception_handler const optional[cudf_io_types.table_input_metadata]& get_metadata( - ) except + + ) except +libcudf_exception_handler # setter - void set_compression(cudf_io_types.compression_type comp) except + - void enable_statistics(bool val) except + - void set_stripe_size_bytes(size_t val) except + - void set_stripe_size_rows(size_type val) except + - void set_row_index_stride(size_type val) except + - void set_table(cudf_table_view.table_view tbl) except + - void set_metadata(cudf_io_types.table_input_metadata meta) except + - void set_key_value_metadata(map[string, string] kvm) except + + void set_compression( + cudf_io_types.compression_type comp + ) except +libcudf_exception_handler + void enable_statistics(bool val) except +libcudf_exception_handler + void set_stripe_size_bytes(size_t val) except +libcudf_exception_handler + void set_stripe_size_rows(size_type val) except +libcudf_exception_handler + void set_row_index_stride(size_type val) except +libcudf_exception_handler + void set_table(cudf_table_view.table_view tbl) except +libcudf_exception_handler + void set_metadata( + cudf_io_types.table_input_metadata meta + ) except +libcudf_exception_handler + void set_key_value_metadata( + map[string, string] kvm + ) except +libcudf_exception_handler @staticmethod orc_writer_options_builder builder( cudf_io_types.sink_info &sink, cudf_table_view.table_view &tbl - ) except + + ) except +libcudf_exception_handler cdef cppclass orc_writer_options_builder: # setter orc_writer_options_builder& compression( cudf_io_types.compression_type comp - ) except + + ) except +libcudf_exception_handler orc_writer_options_builder& enable_statistics( cudf_io_types.statistics_freq val - ) except + - orc_writer_options_builder& stripe_size_bytes(size_t val) except + - orc_writer_options_builder& stripe_size_rows(size_type val) except + - orc_writer_options_builder& row_index_stride(size_type val) except + + ) except +libcudf_exception_handler + orc_writer_options_builder& stripe_size_bytes( + size_t val + ) except +libcudf_exception_handler + orc_writer_options_builder& stripe_size_rows( + size_type val + ) except +libcudf_exception_handler + orc_writer_options_builder& row_index_stride( + size_type val + ) except +libcudf_exception_handler orc_writer_options_builder& table( cudf_table_view.table_view tbl - ) except + + ) except +libcudf_exception_handler orc_writer_options_builder& metadata( cudf_io_types.table_input_metadata meta - ) except + + ) except +libcudf_exception_handler orc_writer_options_builder& key_value_metadata( map[string, string] kvm - ) except + + ) except +libcudf_exception_handler - orc_writer_options build() except + + orc_writer_options build() except +libcudf_exception_handler - cdef void write_orc(orc_writer_options options) except + + cdef void write_orc( + orc_writer_options options + ) except +libcudf_exception_handler cdef cppclass chunked_orc_writer_options: - chunked_orc_writer_options() except + - cudf_io_types.sink_info get_sink() except + - cudf_io_types.compression_type get_compression() except + - bool enable_statistics() except + - size_t stripe_size_bytes() except + - size_type stripe_size_rows() except + - size_type row_index_stride() except + - cudf_table_view.table_view get_table() except + + chunked_orc_writer_options() except +libcudf_exception_handler + cudf_io_types.sink_info get_sink() except +libcudf_exception_handler + cudf_io_types.compression_type get_compression()\ + except +libcudf_exception_handler + bool enable_statistics() except +libcudf_exception_handler + size_t stripe_size_bytes() except +libcudf_exception_handler + size_type stripe_size_rows() except +libcudf_exception_handler + size_type row_index_stride() except +libcudf_exception_handler + cudf_table_view.table_view get_table() except +libcudf_exception_handler const optional[cudf_io_types.table_input_metadata]& get_metadata( - ) except + + ) except +libcudf_exception_handler # setter - void set_compression(cudf_io_types.compression_type comp) except + - void enable_statistics(bool val) except + - void set_stripe_size_bytes(size_t val) except + - void set_stripe_size_rows(size_type val) except + - void set_row_index_stride(size_type val) except + - void set_table(cudf_table_view.table_view tbl) except + + void set_compression( + cudf_io_types.compression_type comp + ) except +libcudf_exception_handler + void enable_statistics(bool val) except +libcudf_exception_handler + void set_stripe_size_bytes(size_t val) except +libcudf_exception_handler + void set_stripe_size_rows(size_type val) except +libcudf_exception_handler + void set_row_index_stride(size_type val) except +libcudf_exception_handler + void set_table(cudf_table_view.table_view tbl) except +libcudf_exception_handler void set_metadata( cudf_io_types.table_input_metadata meta - ) except + - void set_key_value_metadata(map[string, string] kvm) except + + ) except +libcudf_exception_handler + void set_key_value_metadata( + map[string, string] kvm + ) except +libcudf_exception_handler @staticmethod chunked_orc_writer_options_builder builder( cudf_io_types.sink_info &sink - ) except + + ) except +libcudf_exception_handler cdef cppclass chunked_orc_writer_options_builder: # setter chunked_orc_writer_options_builder& compression( cudf_io_types.compression_type comp - ) except + + ) except +libcudf_exception_handler chunked_orc_writer_options_builder& enable_statistics( cudf_io_types.statistics_freq val - ) except + - orc_writer_options_builder& stripe_size_bytes(size_t val) except + - orc_writer_options_builder& stripe_size_rows(size_type val) except + - orc_writer_options_builder& row_index_stride(size_type val) except + + ) except +libcudf_exception_handler + orc_writer_options_builder& stripe_size_bytes( + size_t val + ) except +libcudf_exception_handler + orc_writer_options_builder& stripe_size_rows( + size_type val + ) except +libcudf_exception_handler + orc_writer_options_builder& row_index_stride( + size_type val + ) except +libcudf_exception_handler chunked_orc_writer_options_builder& table( cudf_table_view.table_view tbl - ) except + + ) except +libcudf_exception_handler chunked_orc_writer_options_builder& metadata( cudf_io_types.table_input_metadata meta - ) except + + ) except +libcudf_exception_handler chunked_orc_writer_options_builder& key_value_metadata( map[string, string] kvm - ) except + + ) except +libcudf_exception_handler - chunked_orc_writer_options build() except + + chunked_orc_writer_options build() except +libcudf_exception_handler cdef cppclass orc_chunked_writer: - orc_chunked_writer() except + - orc_chunked_writer(chunked_orc_writer_options args) except + + orc_chunked_writer() except +libcudf_exception_handler + orc_chunked_writer( + chunked_orc_writer_options args + ) except +libcudf_exception_handler orc_chunked_writer& write( cudf_table_view.table_view table_, - ) except + - void close() except + + ) except +libcudf_exception_handler + void close() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd index 9302ffe2f80..38954d22676 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd @@ -1,10 +1,10 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t from libcpp cimport bool from libcpp.optional cimport optional from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.io cimport types as cudf_io_types from pylibcudf.variant cimport monostate, variant @@ -69,4 +69,4 @@ cdef extern from "cudf/io/orc_metadata.hpp" \ cdef parsed_orc_statistics read_parsed_orc_statistics( cudf_io_types.source_info src_info - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd index de6a6c1e82d..110c9d4a0b9 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int64_t, uint8_t from libcpp cimport bool from libcpp.functional cimport reference_wrapper @@ -8,6 +7,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.optional cimport optional from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.expressions cimport expression from pylibcudf.libcudf.io.types cimport ( compression_type, @@ -25,232 +25,241 @@ from pylibcudf.libcudf.types cimport data_type, size_type cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cdef cppclass parquet_reader_options: - parquet_reader_options() except + - source_info get_source_info() except + - vector[vector[size_type]] get_row_groups() except + - const optional[reference_wrapper[expression]]& get_filter() except + - data_type get_timestamp_type() except + - bool is_enabled_use_pandas_metadata() except + - bool is_enabled_arrow_schema() except + - bool is_enabled_allow_mismatched_pq_schemas() except + + parquet_reader_options() except +libcudf_exception_handler + source_info get_source_info() except +libcudf_exception_handler + vector[vector[size_type]] get_row_groups() except +libcudf_exception_handler + const optional[reference_wrapper[expression]]& get_filter()\ + except +libcudf_exception_handler + data_type get_timestamp_type() except +libcudf_exception_handler + bool is_enabled_use_pandas_metadata() except +libcudf_exception_handler + bool is_enabled_arrow_schema() except +libcudf_exception_handler + bool is_enabled_allow_mismatched_pq_schemas() except +libcudf_exception_handler # setter - void set_filter(expression &filter) except + - void set_columns(vector[string] col_names) except + - void set_num_rows(size_type val) except + - void set_row_groups(vector[vector[size_type]] row_grp) except + - void set_skip_rows(int64_t val) except + - void enable_use_arrow_schema(bool val) except + - void enable_allow_mismatched_pq_schemas(bool val) except + - void enable_use_pandas_metadata(bool val) except + - void set_timestamp_type(data_type type) except + + void set_filter(expression &filter) except +libcudf_exception_handler + void set_columns(vector[string] col_names) except +libcudf_exception_handler + void set_num_rows(size_type val) except +libcudf_exception_handler + void set_row_groups( + vector[vector[size_type]] row_grp + ) except +libcudf_exception_handler + void set_skip_rows(int64_t val) except +libcudf_exception_handler + void enable_use_arrow_schema(bool val) except +libcudf_exception_handler + void enable_allow_mismatched_pq_schemas( + bool val + ) except +libcudf_exception_handler + void enable_use_pandas_metadata(bool val) except +libcudf_exception_handler + void set_timestamp_type(data_type type) except +libcudf_exception_handler @staticmethod parquet_reader_options_builder builder( source_info src - ) except + + ) except +libcudf_exception_handler cdef cppclass parquet_reader_options_builder: - parquet_reader_options_builder() except + + parquet_reader_options_builder() except +libcudf_exception_handler parquet_reader_options_builder( source_info src - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& columns( vector[string] col_names - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& row_groups( vector[vector[size_type]] row_grp - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& convert_strings_to_categories( bool val - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& use_pandas_metadata( bool val - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& use_arrow_schema( bool val - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& allow_mismatched_pq_schemas( bool val - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& timestamp_type( data_type type - ) except + + ) except +libcudf_exception_handler parquet_reader_options_builder& filter( const expression & f - ) except + - parquet_reader_options build() except + + ) except +libcudf_exception_handler + parquet_reader_options build() except +libcudf_exception_handler cdef table_with_metadata read_parquet( - parquet_reader_options args) except + + parquet_reader_options args) except +libcudf_exception_handler cdef cppclass parquet_writer_options_base: - parquet_writer_options_base() except + - sink_info get_sink_info() except + - compression_type get_compression() except + - statistics_freq get_stats_level() except + + parquet_writer_options_base() except +libcudf_exception_handler + sink_info get_sink_info() except +libcudf_exception_handler + compression_type get_compression() except +libcudf_exception_handler + statistics_freq get_stats_level() except +libcudf_exception_handler const optional[table_input_metadata]& get_metadata( - ) except + - size_t get_row_group_size_bytes() except + - size_type get_row_group_size_rows() except + - size_t get_max_page_size_bytes() except + - size_type get_max_page_size_rows() except + - size_t get_max_dictionary_size() except + - bool is_enabled_write_arrow_schema() except + + ) except +libcudf_exception_handler + size_t get_row_group_size_bytes() except +libcudf_exception_handler + size_type get_row_group_size_rows() except +libcudf_exception_handler + size_t get_max_page_size_bytes() except +libcudf_exception_handler + size_type get_max_page_size_rows() except +libcudf_exception_handler + size_t get_max_dictionary_size() except +libcudf_exception_handler + bool is_enabled_write_arrow_schema() except +libcudf_exception_handler void set_metadata( table_input_metadata m - ) except + + ) except +libcudf_exception_handler void set_key_value_metadata( vector[map[string, string]] kvm - ) except + + ) except +libcudf_exception_handler void set_stats_level( statistics_freq sf - ) except + + ) except +libcudf_exception_handler void set_compression( compression_type compression - ) except + + ) except +libcudf_exception_handler void set_int96_timestamps( bool enabled - ) except + + ) except +libcudf_exception_handler void set_utc_timestamps( bool enabled - ) except + - void set_row_group_size_bytes(size_t val) except + - void set_row_group_size_rows(size_type val) except + - void set_max_page_size_bytes(size_t val) except + - void set_max_page_size_rows(size_type val) except + - void set_max_dictionary_size(size_t val) except + - void enable_write_v2_headers(bool val) except + - void enable_write_arrow_schema(bool val) except + - void set_dictionary_policy(dictionary_policy policy) except + + ) except +libcudf_exception_handler + void set_row_group_size_bytes(size_t val) except +libcudf_exception_handler + void set_row_group_size_rows(size_type val) except +libcudf_exception_handler + void set_max_page_size_bytes(size_t val) except +libcudf_exception_handler + void set_max_page_size_rows(size_type val) except +libcudf_exception_handler + void set_max_dictionary_size(size_t val) except +libcudf_exception_handler + void enable_write_v2_headers(bool val) except +libcudf_exception_handler + void enable_write_arrow_schema(bool val) except +libcudf_exception_handler + void set_dictionary_policy( + dictionary_policy policy + ) except +libcudf_exception_handler cdef cppclass parquet_writer_options(parquet_writer_options_base): - parquet_writer_options() except + - table_view get_table() except + - string get_column_chunks_file_paths() except + + parquet_writer_options() except +libcudf_exception_handler + table_view get_table() except +libcudf_exception_handler + string get_column_chunks_file_paths() except +libcudf_exception_handler void set_partitions( vector[partition_info] partitions - ) except + + ) except +libcudf_exception_handler void set_column_chunks_file_paths( vector[string] column_chunks_file_paths - ) except + + ) except +libcudf_exception_handler @staticmethod parquet_writer_options_builder builder( sink_info sink_, table_view table_ - ) except + + ) except +libcudf_exception_handler cdef cppclass parquet_writer_options_builder_base[BuilderT, OptionsT]: - parquet_writer_options_builder_base() except + + parquet_writer_options_builder_base() except +libcudf_exception_handler BuilderT& metadata( table_input_metadata m - ) except + + ) except +libcudf_exception_handler BuilderT& key_value_metadata( vector[map[string, string]] kvm - ) except + + ) except +libcudf_exception_handler BuilderT& stats_level( statistics_freq sf - ) except + + ) except +libcudf_exception_handler BuilderT& compression( compression_type compression - ) except + + ) except +libcudf_exception_handler BuilderT& int96_timestamps( bool enabled - ) except + + ) except +libcudf_exception_handler BuilderT& utc_timestamps( bool enabled - ) except + + ) except +libcudf_exception_handler BuilderT& write_arrow_schema( bool enabled - ) except + + ) except +libcudf_exception_handler BuilderT& row_group_size_bytes( size_t val - ) except + + ) except +libcudf_exception_handler BuilderT& row_group_size_rows( size_type val - ) except + + ) except +libcudf_exception_handler BuilderT& max_page_size_bytes( size_t val - ) except + + ) except +libcudf_exception_handler BuilderT& max_page_size_rows( size_type val - ) except + + ) except +libcudf_exception_handler BuilderT& max_dictionary_size( size_t val - ) except + + ) except +libcudf_exception_handler BuilderT& write_v2_headers( bool val - ) except + + ) except +libcudf_exception_handler BuilderT& dictionary_policy( dictionary_policy val - ) except + - OptionsT build() except + + ) except +libcudf_exception_handler + OptionsT build() except +libcudf_exception_handler cdef cppclass parquet_writer_options_builder( parquet_writer_options_builder_base[parquet_writer_options_builder, parquet_writer_options]): - parquet_writer_options_builder() except + + parquet_writer_options_builder() except +libcudf_exception_handler parquet_writer_options_builder( sink_info sink_, table_view table_ - ) except + + ) except +libcudf_exception_handler parquet_writer_options_builder& partitions( vector[partition_info] partitions - ) except + + ) except +libcudf_exception_handler parquet_writer_options_builder& column_chunks_file_paths( vector[string] column_chunks_file_paths - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[vector[uint8_t]] write_parquet( parquet_writer_options args - ) except + + ) except +libcudf_exception_handler cdef cppclass chunked_parquet_writer_options(parquet_writer_options_base): - chunked_parquet_writer_options() except + + chunked_parquet_writer_options() except +libcudf_exception_handler @staticmethod chunked_parquet_writer_options_builder builder( sink_info sink_, - ) except + + ) except +libcudf_exception_handler cdef cppclass chunked_parquet_writer_options_builder( parquet_writer_options_builder_base[chunked_parquet_writer_options_builder, chunked_parquet_writer_options] ): - chunked_parquet_writer_options_builder() except + + chunked_parquet_writer_options_builder() except +libcudf_exception_handler chunked_parquet_writer_options_builder( sink_info sink_, - ) except + + ) except +libcudf_exception_handler cdef cppclass parquet_chunked_writer: - parquet_chunked_writer() except + - parquet_chunked_writer(chunked_parquet_writer_options args) except + + parquet_chunked_writer() except +libcudf_exception_handler + parquet_chunked_writer( + chunked_parquet_writer_options args + ) except +libcudf_exception_handler parquet_chunked_writer& write( table_view table_, - ) except + + ) except +libcudf_exception_handler parquet_chunked_writer& write( const table_view& table_, const vector[partition_info]& partitions, - ) except + + ) except +libcudf_exception_handler unique_ptr[vector[uint8_t]] close( vector[string] column_chunks_file_paths, - ) except + + ) except +libcudf_exception_handler cdef cppclass chunked_parquet_reader: - chunked_parquet_reader() except + + chunked_parquet_reader() except +libcudf_exception_handler chunked_parquet_reader( size_t chunk_read_limit, - const parquet_reader_options& options) except + + const parquet_reader_options& options) except +libcudf_exception_handler chunked_parquet_reader( size_t chunk_read_limit, size_t pass_read_limit, - const parquet_reader_options& options) except + - bool has_next() except + - table_with_metadata read_chunk() except + + const parquet_reader_options& options) except +libcudf_exception_handler + bool has_next() except +libcudf_exception_handler + table_with_metadata read_chunk() except +libcudf_exception_handler cdef unique_ptr[vector[uint8_t]] merge_row_group_metadata( const vector[unique_ptr[vector[uint8_t]]]& metadata_list - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/parquet_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet_metadata.pxd index b0ce13e4492..cdc87093f3a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/parquet_metadata.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet_metadata.pxd @@ -1,31 +1,34 @@ # Copyright (c) 2024, NVIDIA CORPORATION. - from libc.stdint cimport int64_t from libcpp.string cimport string from libcpp.unordered_map cimport unordered_map from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport size_type from pylibcudf.libcudf.io.types cimport source_info cdef extern from "cudf/io/parquet_metadata.hpp" namespace "cudf::io" nogil: cdef cppclass parquet_column_schema: - parquet_column_schema() except+ - string name() except+ - size_type num_children() except+ - parquet_column_schema child(int idx) except+ - vector[parquet_column_schema] children() except+ + parquet_column_schema() except +libcudf_exception_handler + string name() except +libcudf_exception_handler + size_type num_children() except +libcudf_exception_handler + parquet_column_schema child(int idx) except +libcudf_exception_handler + vector[parquet_column_schema] children() except +libcudf_exception_handler cdef cppclass parquet_schema: - parquet_schema() except+ - parquet_column_schema root() except+ + parquet_schema() except +libcudf_exception_handler + parquet_column_schema root() except +libcudf_exception_handler cdef cppclass parquet_metadata: - parquet_metadata() except+ - parquet_schema schema() except+ - int64_t num_rows() except+ - size_type num_rowgroups() except+ - unordered_map[string, string] metadata() except+ - vector[unordered_map[string, int64_t]] rowgroup_metadata() except+ + parquet_metadata() except +libcudf_exception_handler + parquet_schema schema() except +libcudf_exception_handler + int64_t num_rows() except +libcudf_exception_handler + size_type num_rowgroups() except +libcudf_exception_handler + unordered_map[string, string] metadata() except +libcudf_exception_handler + vector[unordered_map[string, int64_t]] rowgroup_metadata()\ + except +libcudf_exception_handler - cdef parquet_metadata read_parquet_metadata(source_info src_info) except+ + cdef parquet_metadata read_parquet_metadata( + source_info src_info + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd index 14397ef970d..b49fede21b3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd @@ -1,9 +1,9 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport uint64_t from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column @@ -11,27 +11,37 @@ cdef extern from "cudf/io/text/byte_range_info.hpp" \ namespace "cudf::io::text" nogil: cdef cppclass byte_range_info: - byte_range_info() except + - byte_range_info(size_t offset, size_t size) except + + byte_range_info() except +libcudf_exception_handler + byte_range_info( + size_t offset, size_t size + ) except +libcudf_exception_handler cdef extern from "cudf/io/text/data_chunk_source.hpp" \ namespace "cudf::io::text" nogil: cdef cppclass data_chunk_source: - data_chunk_source() except + + data_chunk_source() except +libcudf_exception_handler cdef extern from "cudf/io/text/data_chunk_source_factories.hpp" \ namespace "cudf::io::text" nogil: - unique_ptr[data_chunk_source] make_source(string data) except + + unique_ptr[data_chunk_source] make_source( + string data + ) except +libcudf_exception_handler unique_ptr[data_chunk_source] \ - make_source_from_file(string filename) except + + make_source_from_file( + string filename + ) except +libcudf_exception_handler unique_ptr[data_chunk_source] \ - make_source_from_bgzip_file(string filename) except + + make_source_from_bgzip_file( + string filename + ) except +libcudf_exception_handler unique_ptr[data_chunk_source] \ - make_source_from_bgzip_file(string filename, - uint64_t virtual_begin, - uint64_t virtual_end) except + + make_source_from_bgzip_file( + string filename, + uint64_t virtual_begin, + uint64_t virtual_end + ) except +libcudf_exception_handler cdef extern from "cudf/io/text/multibyte_split.hpp" \ @@ -41,8 +51,10 @@ cdef extern from "cudf/io/text/multibyte_split.hpp" \ byte_range_info byte_range bool strip_delimiters - parse_options() except + + parse_options() except +libcudf_exception_handler - unique_ptr[column] multibyte_split(data_chunk_source source, - string delimiter, - parse_options options) except + + unique_ptr[column] multibyte_split( + data_chunk_source source, + string delimiter, + parse_options options + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd index 676901efcec..b59692ebdac 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd @@ -1,9 +1,9 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.optional cimport optional from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table @@ -11,4 +11,4 @@ cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil: unique_ptr[table] make_timezone_transition_table( optional[string] tzif_dir, string timezone_name - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/types.pxd b/python/pylibcudf/pylibcudf/libcudf/io/types.pxd index 5f3be2f0727..e02cb79e10d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/types.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.io.data_sink as cudf_io_data_sink cimport pylibcudf.libcudf.io.datasource as cudf_io_datasource cimport pylibcudf.libcudf.table.table_view as cudf_table_view @@ -10,6 +9,7 @@ from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.unordered_map cimport unordered_map from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type @@ -72,7 +72,7 @@ cdef extern from "cudf/io/types.hpp" \ vector[column_name_info] children cdef cppclass table_metadata: - table_metadata() except + + table_metadata() except +libcudf_exception_handler map[string, string] user_data vector[unordered_map[string, string]] per_file_user_data @@ -97,8 +97,10 @@ cdef extern from "cudf/io/types.hpp" \ string get_name() cdef cppclass table_input_metadata: - table_input_metadata() except + - table_input_metadata(const cudf_table_view.table_view& table) except + + table_input_metadata() except +libcudf_exception_handler + table_input_metadata( + const cudf_table_view.table_view& table + ) except +libcudf_exception_handler vector[column_in_metadata] column_metadata @@ -107,7 +109,9 @@ cdef extern from "cudf/io/types.hpp" \ size_type num_rows partition_info() - partition_info(size_type start_row, size_type num_rows) except + + partition_info( + size_type start_row, size_type num_rows + ) except +libcudf_exception_handler cdef cppclass host_buffer: const char* data @@ -117,21 +121,33 @@ cdef extern from "cudf/io/types.hpp" \ host_buffer(const char* data, size_t size) cdef cppclass source_info: - const vector[string]& filepaths() except + - - source_info() except + - source_info(const vector[string] &filepaths) except + - source_info(const vector[host_buffer] &host_buffers) except + - source_info(cudf_io_datasource.datasource *source) except + - source_info(const vector[cudf_io_datasource.datasource*] &datasources) except + + const vector[string]& filepaths() except +libcudf_exception_handler + + source_info() except +libcudf_exception_handler + source_info( + const vector[string] &filepaths + ) except +libcudf_exception_handler + source_info( + const vector[host_buffer] &host_buffers + ) except +libcudf_exception_handler + source_info( + cudf_io_datasource.datasource *source + ) except +libcudf_exception_handler + source_info( + const vector[cudf_io_datasource.datasource*] &datasources + ) except +libcudf_exception_handler cdef cppclass sink_info: const vector[string]& filepaths() const vector[cudf_io_data_sink.data_sink *]& user_sinks() - sink_info() except + - sink_info(string file_path) except + - sink_info(vector[string] file_path) except + - sink_info(vector[char] * buffer) except + - sink_info(cudf_io_data_sink.data_sink * user_sink) except + - sink_info(vector[cudf_io_data_sink.data_sink *] user_sink) except + + sink_info() except +libcudf_exception_handler + sink_info(string file_path) except +libcudf_exception_handler + sink_info(vector[string] file_path) except +libcudf_exception_handler + sink_info(vector[char] * buffer) except +libcudf_exception_handler + sink_info( + cudf_io_data_sink.data_sink * user_sink + ) except +libcudf_exception_handler + sink_info( + vector[cudf_io_data_sink.data_sink *] user_sink + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd index f8e592c2104..5a36b05fd9f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/join.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd @@ -22,57 +22,57 @@ cdef extern from "cudf/join.hpp" namespace "cudf" nogil: cdef gather_map_pair_type inner_join( const table_view left_keys, const table_view right_keys, - ) except + + ) except +libcudf_exception_handler cdef gather_map_pair_type left_join( const table_view left_keys, const table_view right_keys, - ) except + + ) except +libcudf_exception_handler cdef gather_map_pair_type full_join( const table_view left_keys, const table_view right_keys, - ) except + + ) except +libcudf_exception_handler cdef gather_map_type left_semi_join( const table_view left_keys, const table_view right_keys, - ) except + + ) except +libcudf_exception_handler cdef gather_map_type left_anti_join( const table_view left_keys, const table_view right_keys, - ) except + + ) except +libcudf_exception_handler cdef gather_map_pair_type inner_join( const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - ) except + + ) except +libcudf_exception_handler cdef gather_map_pair_type left_join( const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - ) except + + ) except +libcudf_exception_handler cdef gather_map_pair_type full_join( const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - ) except + + ) except +libcudf_exception_handler cdef gather_map_type left_semi_join( const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - ) except + + ) except +libcudf_exception_handler cdef gather_map_type left_anti_join( const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] cross_join( const table_view left, diff --git a/python/pylibcudf/pylibcudf/libcudf/json.pxd b/python/pylibcudf/pylibcudf/libcudf/json.pxd index 571ba7be7af..d5bdd6d299a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/json.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/json.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar @@ -10,18 +10,20 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil: cdef cppclass get_json_object_options: - get_json_object_options() except + + get_json_object_options() except +libcudf_exception_handler # getters - bool get_allow_single_quotes() except + - bool get_strip_quotes_from_single_strings() except + - bool get_missing_fields_as_nulls() except + + bool get_allow_single_quotes() except +libcudf_exception_handler + bool get_strip_quotes_from_single_strings() except +libcudf_exception_handler + bool get_missing_fields_as_nulls() except +libcudf_exception_handler # setters - void set_allow_single_quotes(bool val) except + - void set_strip_quotes_from_single_strings(bool val) except + - void set_missing_fields_as_nulls(bool val) except + + void set_allow_single_quotes(bool val) except +libcudf_exception_handler + void set_strip_quotes_from_single_strings( + bool val + ) except +libcudf_exception_handler + void set_missing_fields_as_nulls(bool val) except +libcudf_exception_handler cdef unique_ptr[column] get_json_object( column_view col, string_scalar json_path, get_json_object_options options, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd index 400c4282f7a..e5dbec879ce 100644 --- a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd @@ -1,6 +1,7 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. from libcpp cimport int from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -16,4 +17,4 @@ cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil: inclusive left_inclusive, const column_view &right_edges, inclusive right_inclusive - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd index 09a5d84c64f..3e4c88d62b0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd @@ -2,6 +2,7 @@ from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table_view cimport table_view @@ -16,13 +17,13 @@ cdef extern from "cudf/lists/combine.hpp" namespace \ cdef unique_ptr[column] concatenate_rows( const table_view input_table - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] concatenate_list_elements( const table_view input_table, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] concatenate_list_elements( const column_view input_table, concatenate_null_policy null_policy - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd index 81a5ad46389..13a32d46c7a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd index e283551ed0c..64c75ccabd3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd @@ -1,9 +1,11 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil: - cdef unique_ptr[column] count_elements(const lists_column_view&) except + + cdef unique_ptr[column] count_elements( + const lists_column_view& + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd index c64b2715cca..adec02caad1 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type @@ -10,4 +10,4 @@ cdef extern from "cudf/lists/explode.hpp" namespace "cudf" nogil: cdef unique_ptr[table] explode_outer( const table_view, size_type explode_column_idx, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd index 2ea060d87de..046bb51c68e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column, column_view from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport size_type @@ -10,8 +10,8 @@ cdef extern from "cudf/lists/extract.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] extract_list_element( const lists_column_view&, size_type - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] extract_list_element( const lists_column_view&, const column_view& - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd index 54f5a8409b6..35e2559d902 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -9,10 +9,10 @@ cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] sequences( const column_view& starts, const column_view& sizes, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] sequences( const column_view& starts, const column_view& steps, const column_view& sizes, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd index a762c6aa333..69d5eda7e7e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view @@ -9,4 +9,4 @@ cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] segmented_gather( const lists_column_view& source_column, const lists_column_view& gather_map_list - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd index f43340a78b0..917245b3bef 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd @@ -1,5 +1,5 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport ( column_view, mutable_column_view, @@ -9,13 +9,19 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil: cdef cppclass lists_column_view(column_view): - lists_column_view() except + - lists_column_view(const lists_column_view& lists_column) except + - lists_column_view(const column_view& lists_column) except + - lists_column_view& operator=(const lists_column_view&) except + - column_view parent() except + - column_view offsets() except + - column_view child() except + + lists_column_view() except +libcudf_exception_handler + lists_column_view( + const lists_column_view& lists_colum + ) except +libcudf_exception_handler + lists_column_view( + const column_view& lists_column + ) except +libcudf_exception_handler + lists_column_view& operator=( + const lists_column_view& + ) except +libcudf_exception_handler + column_view parent() except +libcudf_exception_handler + column_view offsets() except +libcudf_exception_handler + column_view child() except +libcudf_exception_handler cdef enum: offsets_column_index "cudf::lists_column_view::offsets_column_index" diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd index 43b671ebfa0..1ae3b4409ef 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view @@ -8,4 +8,4 @@ from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view cdef extern from "cudf/lists/reverse.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] reverse( const lists_column_view& lists_column, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd index 266f04ef6b3..1f4855bdbf3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport nan_equality, null_equality @@ -12,25 +12,25 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] have_overlap( const lists_column_view& lhs, const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] intersect_distinct( const lists_column_view& lhs, const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] union_distinct( const lists_column_view& lhs, const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd index ea45f999c47..344b55b402f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport null_order, order @@ -11,10 +11,10 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil: const lists_column_view source_column, order column_order, null_order null_precedence - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] stable_sort_lists( const lists_column_view source_column, order column_order, null_order null_precedence - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd index d9df7c3ca2e..8341ac69bf5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport nan_equality, null_equality @@ -11,10 +11,10 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \ cdef unique_ptr[column] apply_boolean_mask( const lists_column_view& lists_column, const lists_column_view& boolean_mask, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] distinct( const lists_column_view& lists_column, null_equality nulls_equal, nan_equality nans_equal - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/merge.pxd b/python/pylibcudf/pylibcudf/libcudf/merge.pxd index 6930b7a0d06..f546ae3bbdd 100644 --- a/python/pylibcudf/pylibcudf/libcudf/merge.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/merge.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.types as libcudf_types from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view @@ -13,4 +13,4 @@ cdef extern from "cudf/merge.hpp" namespace "cudf" nogil: vector[libcudf_types.size_type] key_cols, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd index 27af4a3bdb1..5b49ddc3bbe 100644 --- a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp.pair cimport pair +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type @@ -12,21 +12,21 @@ from rmm.librmm.device_buffer cimport device_buffer cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: cdef device_buffer copy_bitmask "cudf::copy_bitmask" ( column_view view - ) except + + ) except +libcudf_exception_handler cdef size_t bitmask_allocation_size_bytes ( size_type number_of_bits, size_t padding_boundary - ) except + + ) except +libcudf_exception_handler cdef size_t bitmask_allocation_size_bytes ( size_type number_of_bits - ) except + + ) except +libcudf_exception_handler cdef device_buffer create_null_mask ( size_type size, mask_state state - ) except + + ) except +libcudf_exception_handler cdef pair[device_buffer, size_type] bitmask_and( table_view view diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd index fd768d22704..c835c8249ca 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -14,10 +14,10 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil: cdef unique_ptr[bpe_merge_pairs] load_merge_pairs( const column_view &merge_pairs - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] byte_pair_encoding( const column_view &strings, const bpe_merge_pairs &merge_pairs, const string_scalar &separator - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd index d459372fb8f..fbb1c0b2f4c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -11,8 +11,8 @@ cdef extern from "nvtext/edit_distance.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] edit_distance( const column_view & strings, const column_view & targets - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] edit_distance_matrix( const column_view & strings - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd index eefae746662..c7bd4da5441 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -13,14 +13,14 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil: const column_view &strings, size_type ngrams, const string_scalar & separator - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] generate_character_ngrams( const column_view &strings, size_type ngrams - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] hash_character_ngrams( const column_view &strings, size_type ngrams - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd index 16c5f7f575e..d40943f2649 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type @@ -12,4 +12,4 @@ cdef extern from "nvtext/jaccard.hpp" namespace "nvtext" nogil: const column_view &input1, const column_view &input2, size_type width - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd index ebf8eda1ce3..8570531dfde 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. - from libc.stdint cimport uint32_t, uint64_t from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar @@ -14,13 +14,13 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const column_view &strings, const numeric_scalar[uint32_t] seed, const size_type width, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] minhash( const column_view &strings, const column_view &seeds, const size_type width, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] minhash_permuted( const column_view &strings, @@ -34,13 +34,13 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const column_view &strings, const column_view &seeds, const size_type width, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] minhash64( const column_view &strings, const numeric_scalar[uint64_t] seed, const size_type width, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] minhash64_permuted( const column_view &strings, @@ -53,9 +53,9 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] word_minhash( const column_view &input, const column_view &seeds - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] word_minhash64( const column_view &input, const column_view &seeds - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd index 89f6e5edfc4..fae8fd1e59a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -14,4 +14,4 @@ cdef extern from "nvtext/ngrams_tokenize.hpp" namespace "nvtext" nogil: size_type ngrams, const string_scalar & delimiter, const string_scalar & separator - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd index cbf121920e1..f8b082c8429 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -10,9 +10,9 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] normalize_spaces( const column_view & strings - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] normalize_characters( const column_view & strings, bool do_lower_case - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd index 6bcfa1d9380..82983aaf618 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -14,11 +14,11 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil: const column_view & targets, const column_view & replacements, const string_scalar & delimiter - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] filter_tokens( const column_view & strings, size_type min_token_length, const string_scalar & replacement, const string_scalar & delimiter - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd index be3a2d75718..1f944d95701 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp cimport bool from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type @@ -15,16 +15,16 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] porter_stemmer_measure( const column_view & strings - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] is_letter( column_view source_strings, letter_type ltype, - size_type character_index) except + + size_type character_index) except +libcudf_exception_handler cdef unique_ptr[column] is_letter( column_view source_strings, letter_type ltype, - column_view indices) except + + column_view indices) except +libcudf_exception_handler ctypedef int32_t underlying_type_t_letter_type diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd index 8dac86d688d..1ac69c87c4b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd @@ -1,9 +1,9 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport uint16_t, uint32_t from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -31,7 +31,7 @@ cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[hashed_vocabulary] load_vocabulary_file( const string &filename_hashed_vocabulary - ) except + + ) except +libcudf_exception_handler cdef tokenizer_result subword_tokenize( const column_view & strings, @@ -40,7 +40,7 @@ cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: uint32_t stride, bool do_lower, bool do_truncate - ) except + + ) except +libcudf_exception_handler cdef tokenizer_result subword_tokenize( const column_view &strings, @@ -49,7 +49,7 @@ cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil: uint32_t stride, bool do_lower, bool do_truncate - ) except + + ) except +libcudf_exception_handler cdef extern from "" namespace "std" nogil: cdef tokenizer_result move(tokenizer_result) diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd index 34c054cf36f..a8f9d0451bc 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -12,43 +12,43 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] tokenize( const column_view & strings, const string_scalar & delimiter - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] tokenize( const column_view & strings, const column_view & delimiters - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] count_tokens( const column_view & strings, const string_scalar & delimiter - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] count_tokens( const column_view & strings, const column_view & delimiters - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] character_tokenize( const column_view & strings - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] detokenize( const column_view & strings, const column_view & row_indices, const string_scalar & separator - ) except + + ) except +libcudf_exception_handler cdef struct tokenize_vocabulary "nvtext::tokenize_vocabulary": pass cdef unique_ptr[tokenize_vocabulary] load_vocabulary( const column_view & strings - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] tokenize_with_vocabulary( const column_view & strings, const tokenize_vocabulary & vocabulary, const string_scalar & delimiter, size_type default_id - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd index 89bddbffab5..04566b6e40a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd @@ -1,10 +1,10 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.types as libcudf_types from libc.stdint cimport uint32_t from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table cimport table @@ -17,18 +17,18 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: const table_view& input, const vector[libcudf_types.size_type]& columns_to_hash, int num_partitions - ) except + + ) except +libcudf_exception_handler cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] \ partition "cudf::partition" ( const table_view& t, const column_view& partition_map, int num_partitions - ) except + + ) except +libcudf_exception_handler cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] \ round_robin_partition "cudf::round_robin_partition" ( const table_view& input, int num_partitions, int start_partition - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd index cf2350fc36c..8f60302e776 100644 --- a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table cimport table @@ -24,7 +24,7 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil: interpolation interp, column_view ordered_indices, bool exact, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] quantiles ( table_view source_table, @@ -33,4 +33,4 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil: sorted is_input_sorted, vector[order] column_order, vector[null_order] null_precedence, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd index 6d2f4bd23d1..ad79187b733 100644 --- a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport pair +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.aggregation cimport reduce_aggregation, scan_aggregation from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -15,7 +15,7 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil: column_view col, const reduce_aggregation& agg, data_type type - ) except + + ) except +libcudf_exception_handler cpdef enum class scan_type(bool): INCLUSIVE "cudf::scan_type::INCLUSIVE", @@ -25,9 +25,9 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil: column_view col, const scan_aggregation& agg, scan_type inclusive - ) except + + ) except +libcudf_exception_handler cdef pair[unique_ptr[scalar], unique_ptr[scalar]] cpp_minmax "cudf::minmax" ( column_view col - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/replace.pxd index 4ac44fc946e..bef5a25367b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/replace.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport ( column_view, @@ -18,32 +18,32 @@ cdef extern from "cudf/replace.hpp" namespace "cudf" nogil: cdef unique_ptr[column] replace_nulls( column_view source_column, - column_view replacement_column) except + + column_view replacement_column) except +libcudf_exception_handler cdef unique_ptr[column] replace_nulls( column_view source_column, - scalar replacement) except + + scalar replacement) except +libcudf_exception_handler cdef unique_ptr[column] replace_nulls( column_view source_column, - replace_policy replace_policy) except + + replace_policy replace_policy) except +libcudf_exception_handler cdef unique_ptr[column] find_and_replace_all( column_view source_column, column_view values_to_replace, - column_view replacement_values) except + + column_view replacement_values) except +libcudf_exception_handler cdef unique_ptr[column] clamp( column_view source_column, scalar lo, scalar lo_replace, - scalar hi, scalar hi_replace) except + + scalar hi, scalar hi_replace) except +libcudf_exception_handler cdef unique_ptr[column] clamp( column_view source_column, - scalar lo, scalar hi) except + + scalar lo, scalar hi) except +libcudf_exception_handler cdef unique_ptr[column] normalize_nans_and_zeros( - column_view source_column) except + + column_view source_column) except +libcudf_exception_handler cdef void normalize_nans_and_zeros( - mutable_column_view source_column) except + + mutable_column_view source_column) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd index 446a082ab1b..92ab4773940 100644 --- a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2019-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view @@ -10,7 +10,7 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil: cdef unique_ptr[column] interleave_columns( table_view source_table - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] tile( table_view source_table, size_type count - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd index 9e76faa0eba..0fd7eeb73c0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.aggregation cimport rolling_aggregation from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -13,11 +13,11 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: column_view preceding_window, column_view following_window, size_type min_periods, - rolling_aggregation& agg) except + + rolling_aggregation& agg) except +libcudf_exception_handler cdef unique_ptr[column] rolling_window( column_view source, size_type preceding_window, size_type following_window, size_type min_periods, - rolling_aggregation& agg) except + + rolling_aggregation& agg) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/round.pxd b/python/pylibcudf/pylibcudf/libcudf/round.pxd index 1b65133f275..efd9e3de25d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/round.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/round.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -16,4 +16,4 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil: const column_view& input, int32_t decimal_places, rounding_method method, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd index a51413669c5..2c67dc325af 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t, int64_t from libcpp cimport bool from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type from pylibcudf.libcudf.table.table_view cimport table_view @@ -11,64 +11,66 @@ from pylibcudf.libcudf.types cimport data_type cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: cdef cppclass scalar: - scalar() except + - scalar(scalar other) except + - data_type type() except + - void set_valid_async(bool is_valid) except + - bool is_valid() except + + scalar() except +libcudf_exception_handler + scalar(scalar other) except +libcudf_exception_handler + data_type type() except +libcudf_exception_handler + void set_valid_async(bool is_valid) except +libcudf_exception_handler + bool is_valid() except +libcudf_exception_handler cdef cppclass numeric_scalar[T](scalar): - numeric_scalar() except + - numeric_scalar(numeric_scalar other) except + - numeric_scalar(T value) except + - numeric_scalar(T value, bool is_valid) except + - void set_value(T value) except + - T value() except + + numeric_scalar() except +libcudf_exception_handler + numeric_scalar(numeric_scalar other) except +libcudf_exception_handler + numeric_scalar(T value) except +libcudf_exception_handler + numeric_scalar(T value, bool is_valid) except +libcudf_exception_handler + void set_value(T value) except +libcudf_exception_handler + T value() except +libcudf_exception_handler cdef cppclass timestamp_scalar[T](scalar): - timestamp_scalar() except + - timestamp_scalar(timestamp_scalar other) except + - timestamp_scalar(int64_t value) except + - timestamp_scalar(int64_t value, bool is_valid) except + - timestamp_scalar(int32_t value) except + - timestamp_scalar(int32_t value, bool is_valid) except + - int64_t ticks_since_epoch_64 "ticks_since_epoch"() except + - int32_t ticks_since_epoch_32 "ticks_since_epoch"() except + - T value() except + + timestamp_scalar() except +libcudf_exception_handler + timestamp_scalar(timestamp_scalar other) except +libcudf_exception_handler + timestamp_scalar(int64_t value) except +libcudf_exception_handler + timestamp_scalar(int64_t value, bool is_valid) except +libcudf_exception_handler + timestamp_scalar(int32_t value) except +libcudf_exception_handler + timestamp_scalar(int32_t value, bool is_valid) except +libcudf_exception_handler + int64_t ticks_since_epoch_64 "ticks_since_epoch"()\ + except +libcudf_exception_handler + int32_t ticks_since_epoch_32 "ticks_since_epoch"()\ + except +libcudf_exception_handler + T value() except +libcudf_exception_handler cdef cppclass duration_scalar[T](scalar): - duration_scalar() except + - duration_scalar(duration_scalar other) except + - duration_scalar(int64_t value) except + - duration_scalar(int64_t value, bool is_valid) except + - duration_scalar(int32_t value) except + - duration_scalar(int32_t value, bool is_valid) except + - int64_t ticks "count"() except + - T value() except + + duration_scalar() except +libcudf_exception_handler + duration_scalar(duration_scalar other) except +libcudf_exception_handler + duration_scalar(int64_t value) except +libcudf_exception_handler + duration_scalar(int64_t value, bool is_valid) except +libcudf_exception_handler + duration_scalar(int32_t value) except +libcudf_exception_handler + duration_scalar(int32_t value, bool is_valid) except +libcudf_exception_handler + int64_t ticks "count"() except +libcudf_exception_handler + T value() except +libcudf_exception_handler cdef cppclass string_scalar(scalar): - string_scalar() except + - string_scalar(string st) except + - string_scalar(string st, bool is_valid) except + - string_scalar(string_scalar other) except + - string to_string() except + + string_scalar() except +libcudf_exception_handler + string_scalar(string st) except +libcudf_exception_handler + string_scalar(string st, bool is_valid) except +libcudf_exception_handler + string_scalar(string_scalar other) except +libcudf_exception_handler + string to_string() except +libcudf_exception_handler cdef cppclass fixed_point_scalar[T](scalar): - fixed_point_scalar() except + + fixed_point_scalar() except +libcudf_exception_handler fixed_point_scalar(int64_t value, scale_type scale, - bool is_valid) except + + bool is_valid) except +libcudf_exception_handler fixed_point_scalar(data_type value, scale_type scale, - bool is_valid) except + - int64_t value() except + + bool is_valid) except +libcudf_exception_handler + int64_t value() except +libcudf_exception_handler # TODO: Figure out how to add an int32 overload of value() cdef cppclass list_scalar(scalar): - list_scalar(column_view col) except + - list_scalar(column_view col, bool is_valid) except + - column_view view() except + + list_scalar(column_view col) except +libcudf_exception_handler + list_scalar(column_view col, bool is_valid) except +libcudf_exception_handler + column_view view() except +libcudf_exception_handler cdef cppclass struct_scalar(scalar): - struct_scalar(table_view cols, bool valid) except + - table_view view() except + + struct_scalar(table_view cols, bool valid) except +libcudf_exception_handler + table_view view() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd index ee4b47935b2..9fb907970de 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd @@ -1,13 +1,19 @@ # Copyright (c) 2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil: - cdef unique_ptr[scalar] make_string_scalar(const string & _string) except + - cdef unique_ptr[scalar] make_fixed_width_scalar[T](T value) except + + cdef unique_ptr[scalar] make_string_scalar( + const string & _string + ) except +libcudf_exception_handler + cdef unique_ptr[scalar] make_fixed_width_scalar[T]( + T value + ) except +libcudf_exception_handler - cdef unique_ptr[scalar] make_empty_scalar_like(const column_view &) except + + cdef unique_ptr[scalar] make_empty_scalar_like( + const column_view & + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/search.pxd b/python/pylibcudf/pylibcudf/libcudf/search.pxd index 5a6ad5384c9..5ec06858baa 100644 --- a/python/pylibcudf/pylibcudf/libcudf/search.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/search.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.types as libcudf_types from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table_view cimport table_view @@ -15,16 +15,16 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil: table_view needles, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] upper_bound( table_view haystack, table_view needles, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] contains( column_view haystack, column_view needles, - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd index 9e899855486..342545a0eec 100644 --- a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd @@ -1,9 +1,9 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - cimport pylibcudf.libcudf.types as libcudf_types from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.aggregation cimport rank_method from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -15,12 +15,14 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: cdef unique_ptr[column] sorted_order( table_view source_table, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[column] stable_sorted_order( table_view source_table, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[column] rank( column_view input_view, @@ -28,45 +30,52 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: libcudf_types.order column_order, libcudf_types.null_policy null_handling, libcudf_types.null_order null_precedence, - bool percentage) except + + bool percentage) except +libcudf_exception_handler cdef bool is_sorted( const table_view& table, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[table] segmented_sort_by_key( const table_view& values, const table_view& keys, const column_view& segment_offsets, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[table] stable_segmented_sort_by_key( const table_view& values, const table_view& keys, const column_view& segment_offsets, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[table] sort_by_key( const table_view& values, const table_view& keys, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[table] stable_sort_by_key( const table_view& values, const table_view& keys, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[table] sort( table_view source_table, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler cdef unique_ptr[table] stable_sort( table_view source_table, vector[libcudf_types.order] column_order, - vector[libcudf_types.null_order] null_precedence) except + + vector[libcudf_types.null_order] null_precedence + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd index 7830c9478c2..78b9bcb299b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table cimport table @@ -23,25 +23,29 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: KEEP_LAST KEEP_NONE - cdef unique_ptr[table] drop_nulls(table_view source_table, - vector[size_type] keys, - size_type keep_threshold) except + + cdef unique_ptr[table] drop_nulls( + table_view source_table, + vector[size_type] keys, + size_type keep_threshold + ) except +libcudf_exception_handler - cdef unique_ptr[table] drop_nans(table_view source_table, - vector[size_type] keys, - size_type keep_threshold) except + + cdef unique_ptr[table] drop_nans( + table_view source_table, + vector[size_type] keys, + size_type keep_threshold + ) except +libcudf_exception_handler cdef unique_ptr[table] apply_boolean_mask( table_view source_table, column_view boolean_mask - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] unique( table_view input, vector[size_type] keys, duplicate_keep_option keep, null_equality nulls_equal, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] distinct( table_view input, @@ -49,14 +53,14 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equals, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] distinct_indices( table_view input, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[table] stable_distinct( table_view input, @@ -64,22 +68,22 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - ) except + + ) except +libcudf_exception_handler cdef size_type unique_count( column_view column, null_policy null_handling, - nan_policy nan_handling) except + + nan_policy nan_handling) except +libcudf_exception_handler cdef size_type unique_count( table_view source_table, - null_policy null_handling) except + + null_policy null_handling) except +libcudf_exception_handler cdef size_type distinct_count( column_view column, null_policy null_handling, - nan_policy nan_handling) except + + nan_policy nan_handling) except +libcudf_exception_handler cdef size_type distinct_count( table_view source_table, - null_policy null_handling) except + + null_policy null_handling) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd index 5e510339834..1cf3c912f95 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -8,10 +8,10 @@ from pylibcudf.libcudf.column.column_view cimport column_view cdef extern from "cudf/strings/attributes.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] count_characters( - column_view source_strings) except + + column_view source_strings) except +libcudf_exception_handler cdef unique_ptr[column] count_bytes( - column_view source_strings) except + + column_view source_strings) except +libcudf_exception_handler cdef unique_ptr[column] code_points( - column_view source_strings) except + + column_view source_strings) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd index 77e3f46d7ee..a3815757d2d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -10,12 +11,12 @@ cdef extern from "cudf/strings/capitalize.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] capitalize( const column_view & strings, const string_scalar & delimiters - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] title( const column_view & strings, string_character_types sequence_type - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] is_title( - const column_view & strings) except + + const column_view & strings) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd index 7869e90f387..7e60476b87b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd @@ -1,21 +1,22 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view cdef extern from "cudf/strings/case.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] capitalize( - const column_view & input) except + + const column_view & input) except +libcudf_exception_handler cdef unique_ptr[column] is_title( - const column_view & input) except + + const column_view & input) except +libcudf_exception_handler cdef unique_ptr[column] to_lower( - const column_view & strings) except + + const column_view & strings) except +libcudf_exception_handler cdef unique_ptr[column] to_upper( - const column_view & strings) except + + const column_view & strings) except +libcudf_exception_handler cdef unique_ptr[column] swapcase( - const column_view & strings) except + + const column_view & strings) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd index 76afe047e8c..6a0ae06c08a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libc.stdint cimport uint32_t from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -25,10 +25,10 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \ cdef unique_ptr[column] all_characters_of_type( column_view source_strings, string_character_types types, - string_character_types verify_types) except + + string_character_types verify_types) except +libcudf_exception_handler cdef unique_ptr[column] filter_characters_of_type( column_view source_strings, string_character_types types_to_remove, string_scalar replacement, - string_character_types types_to_keep) except + + string_character_types types_to_keep) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd index e659993b834..90be281429b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd @@ -2,6 +2,7 @@ from libcpp cimport int from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -22,19 +23,19 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: table_view strings_columns, string_scalar separator, string_scalar narep, - separator_on_nulls separate_nulls) except + + separator_on_nulls separate_nulls) except +libcudf_exception_handler cdef unique_ptr[column] concatenate( table_view strings_columns, column_view separators, string_scalar separator_narep, string_scalar col_narep, - separator_on_nulls separate_nulls) except + + separator_on_nulls separate_nulls) except +libcudf_exception_handler cdef unique_ptr[column] join_strings( column_view input, string_scalar separator, - string_scalar narep) except + + string_scalar narep) except +libcudf_exception_handler cdef unique_ptr[column] join_list_elements( column_view lists_strings_column, @@ -42,11 +43,11 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar separator_narep, string_scalar string_narep, separator_on_nulls separate_nulls, - output_if_empty_list empty_list_policy) except + + output_if_empty_list empty_list_policy) except +libcudf_exception_handler cdef unique_ptr[column] join_list_elements( column_view lists_strings_column, string_scalar separator, string_scalar narep, separator_on_nulls separate_nulls, - output_if_empty_list empty_list_policy) except + + output_if_empty_list empty_list_policy) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd index eac0f748257..8eb287c6b06 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -11,22 +11,22 @@ cdef extern from "cudf/strings/contains.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] contains_re( column_view source_strings, - regex_program) except + + regex_program) except +libcudf_exception_handler cdef unique_ptr[column] count_re( column_view source_strings, - regex_program) except + + regex_program) except +libcudf_exception_handler cdef unique_ptr[column] matches_re( column_view source_strings, - regex_program) except + + regex_program) except +libcudf_exception_handler cdef unique_ptr[column] like( column_view source_strings, string_scalar pattern, - string_scalar escape_character) except + + string_scalar escape_character) except +libcudf_exception_handler cdef unique_ptr[column] like( column_view source_strings, column_view patterns, - string_scalar escape_character) except + + string_scalar escape_character) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd index e6688cfff81..37f39b098b3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -9,9 +10,9 @@ cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] to_booleans( column_view input, - string_scalar true_string) except + + string_scalar true_string) except +libcudf_exception_handler cdef unique_ptr[column] from_booleans( column_view booleans, string_scalar true_string, - string_scalar false_string) except + + string_scalar false_string) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd index fceddd58df0..c316b7891a3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type @@ -12,13 +12,13 @@ cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \ cdef unique_ptr[column] to_timestamps( column_view input, data_type timestamp_type, - string format) except + + string format) except +libcudf_exception_handler cdef unique_ptr[column] from_timestamps( column_view timestamps, string format, - column_view names) except + + column_view names) except +libcudf_exception_handler cdef unique_ptr[column] is_timestamp( column_view input_col, - string format) except + + string format) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd index 43ffad1d89f..75374208172 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type @@ -12,8 +12,8 @@ cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \ cdef unique_ptr[column] to_durations( const column_view & input, data_type duration_type, - const string & format) except + + const string & format) except +libcudf_exception_handler cdef unique_ptr[column] from_durations( const column_view & durations, - const string & format) except + + const string & format) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd index 72ab329f2dd..71c866ad211 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type @@ -10,12 +10,12 @@ cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] to_fixed_point( column_view input, - data_type output_type) except + + data_type output_type) except +libcudf_exception_handler cdef unique_ptr[column] from_fixed_point( - column_view input) except + + column_view input) except +libcudf_exception_handler cdef unique_ptr[column] is_fixed_point( column_view input, data_type decimal_type - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd index a45c7f9979e..7df6b914458 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type @@ -10,11 +10,11 @@ cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] to_floats( column_view strings, - data_type output_type) except + + data_type output_type) except +libcudf_exception_handler cdef unique_ptr[column] from_floats( - column_view floats) except + + column_view floats) except +libcudf_exception_handler cdef unique_ptr[column] is_float( column_view input - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd index 69d566b8c49..4033ef51480 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd @@ -1,5 +1,4 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd index 801db438e92..33f9c798ae6 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -8,11 +8,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] ipv4_to_integers( - column_view input) except + + column_view input) except +libcudf_exception_handler cdef unique_ptr[column] integers_to_ipv4( - column_view integers) except + + column_view integers) except +libcudf_exception_handler cdef unique_ptr[column] is_ipv4( column_view input - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd index 6e1ecd30539..3d0a677424e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -11,4 +12,4 @@ cdef extern from "cudf/strings/convert/convert_lists.hpp" namespace \ cdef unique_ptr[column] format_list_column( column_view input, string_scalar na_rep, - column_view separators) except + + column_view separators) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd index cb319ad143b..03a14e215e0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view cdef extern from "cudf/strings/convert/convert_urls.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] url_encode( - column_view input) except + + column_view input) except +libcudf_exception_handler cdef unique_ptr[column] url_decode( - column_view input) except + + column_view input) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd index b7166167cfd..18608554921 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.strings.regex_program cimport regex_program @@ -11,8 +11,8 @@ cdef extern from "cudf/strings/extract.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[table] extract( column_view input, - regex_program prog) except + + regex_program prog) except +libcudf_exception_handler cdef unique_ptr[column] extract_all_record( column_view input, - regex_program prog) except + + regex_program prog) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd index 1d1df1b8b8e..4082145c5b8 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -12,41 +12,41 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] contains( column_view source_strings, - string_scalar target) except + + string_scalar target) except +libcudf_exception_handler cdef unique_ptr[column] contains( column_view source_strings, - column_view target_strings) except + + column_view target_strings) except +libcudf_exception_handler cdef unique_ptr[column] ends_with( column_view source_strings, - string_scalar target) except + + string_scalar target) except +libcudf_exception_handler cdef unique_ptr[column] ends_with( column_view source_strings, - column_view target_strings) except + + column_view target_strings) except +libcudf_exception_handler cdef unique_ptr[column] starts_with( column_view source_strings, - string_scalar target) except + + string_scalar target) except +libcudf_exception_handler cdef unique_ptr[column] starts_with( column_view source_strings, - column_view target_strings) except + + column_view target_strings) except +libcudf_exception_handler cdef unique_ptr[column] find( column_view source_strings, string_scalar target, size_type start, - size_type stop) except + + size_type stop) except +libcudf_exception_handler cdef unique_ptr[column] find( column_view source_strings, column_view target, - size_type start) except + + size_type start) except +libcudf_exception_handler cdef unique_ptr[column] rfind( column_view source_strings, string_scalar target, size_type start, - size_type stop) except + + size_type stop) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd index 3d048c1f50b..b03044db4f4 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -10,4 +10,4 @@ cdef extern from "cudf/strings/find_multiple.hpp" namespace "cudf::strings" \ cdef unique_ptr[column] find_multiple( column_view input, - column_view targets) except + + column_view targets) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd index 0d286c36446..eda68c35e58 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2019-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.strings.regex_program cimport regex_program @@ -10,8 +10,8 @@ cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] findall( column_view input, - regex_program prog) except + + regex_program prog) except +libcudf_exception_handler cdef unique_ptr[column] find_re( column_view input, - regex_program prog) except + + regex_program prog) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd index 875f8cafd14..d82f76f98b6 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd @@ -2,6 +2,7 @@ from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -15,8 +16,8 @@ cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil: column_view input, size_type width, side_type side, - string fill_char) except + + string fill_char) except +libcudf_exception_handler cdef unique_ptr[column] zfill( column_view input, - size_type width) except + + size_type width) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pxd index 41617f157b7..1aa22107183 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/regex_flags.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/strings/regex/flags.hpp" \ diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/regex_program.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/regex_program.pxd index 5d1d9e583d5..21f52f3de24 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/regex_program.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/regex_program.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.strings.regex_flags cimport regex_flags @@ -14,4 +14,4 @@ cdef extern from "cudf/strings/regex/regex_program.hpp" \ unique_ptr[regex_program] create( string pattern, regex_flags flags - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd index 59262820411..de65b554eba 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type @@ -11,8 +11,8 @@ cdef extern from "cudf/strings/repeat_strings.hpp" namespace "cudf::strings" \ cdef unique_ptr[column] repeat_strings( column_view input, - size_type repeat_times) except + + size_type repeat_times) except +libcudf_exception_handler cdef unique_ptr[column] repeat_strings( column_view input, - column_view repeat_times) except + + column_view repeat_times) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd index fd5f4fc4751..68743d85712 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -14,15 +14,15 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil: column_view source_strings, string_scalar repl, size_type start, - size_type stop) except + + size_type stop) except +libcudf_exception_handler cdef unique_ptr[column] replace( column_view source_strings, string_scalar target, string_scalar repl, - int32_t maxrepl) except + + int32_t maxrepl) except +libcudf_exception_handler cdef unique_ptr[column] replace_multiple( column_view source_strings, column_view target_strings, - column_view repl_strings) except + + column_view repl_strings) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd index 6b0c90d0acc..2a7d50346be 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -18,15 +18,15 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil: column_view input, regex_program prog, string_scalar replacement, - size_type max_replace_count) except + + size_type max_replace_count) except +libcudf_exception_handler cdef unique_ptr[column] replace_re( column_view input, vector[string] patterns, column_view replacements, - regex_flags flags) except + + regex_flags flags) except +libcudf_exception_handler cdef unique_ptr[column] replace_with_backrefs( column_view input, regex_program prog, - string replacement) except + + string replacement) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd index e92c5dc1d66..9626763d5af 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. from libcpp cimport int +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/strings/side_type.hpp" namespace "cudf::strings" nogil: diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd index 4299cf62e99..d1a2ddbaef4 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -13,8 +13,8 @@ cdef extern from "cudf/strings/split/partition.hpp" namespace \ cdef unique_ptr[table] partition( column_view input, - string_scalar delimiter) except + + string_scalar delimiter) except +libcudf_exception_handler cdef unique_ptr[table] rpartition( column_view input, - string_scalar delimiter) except + + string_scalar delimiter) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd index a22a79fc7d7..34fb72a3b33 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -16,22 +16,22 @@ cdef extern from "cudf/strings/split/split.hpp" namespace \ cdef unique_ptr[table] split( column_view strings_column, string_scalar delimiter, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler cdef unique_ptr[table] rsplit( column_view strings_column, string_scalar delimiter, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler cdef unique_ptr[column] split_record( column_view strings, string_scalar delimiter, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler cdef unique_ptr[column] rsplit_record( column_view strings, string_scalar delimiter, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler cdef extern from "cudf/strings/split/split_re.hpp" namespace \ @@ -40,19 +40,19 @@ cdef extern from "cudf/strings/split/split_re.hpp" namespace \ cdef unique_ptr[table] split_re( const column_view& input, regex_program prog, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler cdef unique_ptr[table] rsplit_re( const column_view& input, regex_program prog, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler cdef unique_ptr[column] split_record_re( const column_view& input, regex_program prog, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler cdef unique_ptr[column] rsplit_record_re( const column_view& input, regex_program prog, - size_type maxsplit) except + + size_type maxsplit) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd index dd527a78e7f..41751ddff3c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -12,4 +12,4 @@ cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] strip( column_view input, side_type side, - string_scalar to_strip) except + + string_scalar to_strip) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd index 576dae9387f..f573870583d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar @@ -12,9 +12,9 @@ cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil: column_view source_strings, numeric_scalar[size_type] start, numeric_scalar[size_type] end, - numeric_scalar[size_type] step) except + + numeric_scalar[size_type] step) except +libcudf_exception_handler cdef unique_ptr[column] slice_strings( column_view source_strings, column_view starts, - column_view stops) except + + column_view stops) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd index 9fd24f2987b..11b63d0ed30 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd @@ -1,9 +1,9 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar @@ -14,7 +14,8 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] translate( column_view input, - vector[pair[char_utf8, char_utf8]] chars_table) except + + vector[pair[char_utf8, char_utf8]] chars_table + ) except +libcudf_exception_handler cpdef enum class filter_type(bool): KEEP @@ -24,4 +25,4 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil: column_view input, vector[pair[char_utf8, char_utf8]] characters_to_filter, filter_type keep_characters, - string_scalar replacement) except + + string_scalar replacement) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd index abc1bd43ad2..2fb49c2a830 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type @@ -10,4 +10,4 @@ cdef extern from "cudf/strings/wrap.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] wrap( column_view input, - size_type width) except + + size_type width) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd b/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd index 2eca043e451..a2654eaab16 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd @@ -1,9 +1,9 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. - from libc.stdint cimport uint8_t, uint16_t from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type @@ -17,17 +17,19 @@ cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \ cdef extern from "cudf/strings/udf/udf_apis.hpp" namespace \ "cudf::strings::udf" nogil: - cdef int get_cuda_build_version() except + - cdef unique_ptr[device_buffer] to_string_view_array(column_view) except + + cdef int get_cuda_build_version() except +libcudf_exception_handler + cdef unique_ptr[device_buffer] to_string_view_array( + column_view + ) except +libcudf_exception_handler cdef unique_ptr[column] column_from_udf_string_array( udf_string* strings, size_type size, - ) except + + ) except +libcudf_exception_handler cdef void free_udf_string_array( udf_string* strings, size_type size - ) except + + ) except +libcudf_exception_handler cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \ "cudf::strings::detail" nogil: - cdef const uint8_t* get_character_flags_table() except + - cdef const uint16_t* get_character_cases_table() except + - cdef const void* get_special_case_mapping_table() except + + cdef const uint8_t* get_character_flags_table() except +libcudf_exception_handler + cdef const uint16_t* get_character_cases_table() except +libcudf_exception_handler + cdef const void* get_special_case_mapping_table() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd index 654c29b083a..b65644dd131 100644 --- a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table_view cimport mutable_table_view, table_view from pylibcudf.libcudf.types cimport size_type @@ -9,10 +9,10 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/table/table.hpp" namespace "cudf" nogil: cdef cppclass table: - table(const table&) except + - table(table_view) except + - size_type num_columns() except + - size_type num_rows() except + - table_view view() except + - mutable_table_view mutable_view() except + - vector[unique_ptr[column]] release() except + + table(const table&) except +libcudf_exception_handler + table(table_view) except +libcudf_exception_handler + size_type num_columns() except +libcudf_exception_handler + size_type num_rows() except +libcudf_exception_handler + table_view view() except +libcudf_exception_handler + mutable_table_view mutable_view() except +libcudf_exception_handler + vector[unique_ptr[column]] release() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/table/table_view.pxd b/python/pylibcudf/pylibcudf/libcudf/table/table_view.pxd index 3af2f6a6c2c..eacfa0420ef 100644 --- a/python/pylibcudf/pylibcudf/libcudf/table/table_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/table/table_view.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport ( column_view, mutable_column_view, @@ -10,16 +10,22 @@ from pylibcudf.libcudf.types cimport size_type cdef extern from "cudf/table/table_view.hpp" namespace "cudf" nogil: cdef cppclass table_view: - table_view() except + - table_view(const vector[column_view]) except + - column_view column(size_type column_index) except + - size_type num_columns() except + - size_type num_rows() except + - table_view select(vector[size_type] column_indices) except + + table_view() except +libcudf_exception_handler + table_view(const vector[column_view]) except +libcudf_exception_handler + column_view column(size_type column_index) except +libcudf_exception_handler + size_type num_columns() except +libcudf_exception_handler + size_type num_rows() except +libcudf_exception_handler + table_view select( + vector[size_type] column_indices + ) except +libcudf_exception_handler cdef cppclass mutable_table_view: - mutable_table_view() except + - mutable_table_view(const vector[mutable_column_view]) except + - mutable_column_view column(size_type column_index) except + - size_type num_columns() except + - size_type num_rows() except + + mutable_table_view() except +libcudf_exception_handler + mutable_table_view( + const vector[mutable_column_view] + ) except +libcudf_exception_handler + mutable_column_view column( + size_type column_index + ) except +libcudf_exception_handler + size_type num_columns() except +libcudf_exception_handler + size_type num_rows() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd index 47d79083b66..78ee7b4b0e5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd @@ -1,9 +1,9 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair from libcpp.string cimport string +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.expressions cimport expression @@ -17,38 +17,38 @@ from rmm.librmm.device_buffer cimport device_buffer cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: cdef pair[unique_ptr[device_buffer], size_type] bools_to_mask ( column_view input - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] mask_to_bools ( bitmask_type* bitmask, size_type begin_bit, size_type end_bit - ) except + + ) except +libcudf_exception_handler cdef pair[unique_ptr[device_buffer], size_type] nans_to_nulls( column_view input - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] compute_column( table_view table, expression expr - ) except + + ) except +libcudf_exception_handler cdef unique_ptr[column] transform( column_view input, string unary_udf, data_type output_type, bool is_ptx - ) except + + ) except +libcudf_exception_handler cdef pair[unique_ptr[table], unique_ptr[column]] encode( table_view input - ) except + + ) except +libcudf_exception_handler cdef pair[unique_ptr[column], table_view] one_hot_encode( column_view input_column, column_view categories - ) + ) except + cdef unique_ptr[column] compute_column( const table_view table, const expression& expr - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd index 9c0e3c073b0..fde49afd99c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table_view cimport table_view @@ -12,4 +12,4 @@ cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil: table_view ] transpose( table_view input_table - ) except + + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/types.pxd b/python/pylibcudf/pylibcudf/libcudf/types.pxd index 60e293e5cdb..3281c230aa0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/types.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t, uint32_t from libcpp cimport bool +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/types.hpp" namespace "cudf" nogil: @@ -85,10 +85,10 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: NUM_TYPE_IDS cdef cppclass data_type: - data_type() except + - data_type(const data_type&) except + - data_type(type_id id) except + - data_type(type_id id, int32_t scale) except + + data_type() except +libcudf_exception_handler + data_type(const data_type&) except +libcudf_exception_handler + data_type(type_id id) except +libcudf_exception_handler + data_type(type_id id, int32_t scale) except +libcudf_exception_handler type_id id() noexcept int32_t scale() noexcept bool operator==(const data_type&, const data_type&) noexcept @@ -100,4 +100,4 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: MIDPOINT NEAREST - cdef size_type size_of(data_type t) except + + cdef size_type size_of(data_type t) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/unary.pxd b/python/pylibcudf/pylibcudf/libcudf/unary.pxd index 887f8c7fca4..4666012623e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/unary.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/unary.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. - from libc.stdint cimport int32_t from libcpp cimport bool from libcpp.memory cimport unique_ptr +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type @@ -36,13 +36,21 @@ cdef extern from "cudf/unary.hpp" namespace "cudf" nogil: cdef extern unique_ptr[column] unary_operation( column_view input, - unary_operator op) except + + unary_operator op) except +libcudf_exception_handler - cdef extern unique_ptr[column] is_null(column_view input) except + - cdef extern unique_ptr[column] is_valid(column_view input) except + + cdef extern unique_ptr[column] is_null( + column_view input + ) except +libcudf_exception_handler + cdef extern unique_ptr[column] is_valid( + column_view input + ) except +libcudf_exception_handler cdef extern unique_ptr[column] cast( column_view input, - data_type out_type) except + + data_type out_type) except +libcudf_exception_handler cdef extern bool is_supported_cast(data_type from_, data_type to) noexcept - cdef extern unique_ptr[column] is_nan(column_view input) except + - cdef extern unique_ptr[column] is_not_nan(column_view input) except + + cdef extern unique_ptr[column] is_nan( + column_view input + ) except +libcudf_exception_handler + cdef extern unique_ptr[column] is_not_nan( + column_view input + ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/span.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/span.pxd index 7e591e96373..8024ce146ae 100644 --- a/python/pylibcudf/pylibcudf/libcudf/utilities/span.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/utilities/span.pxd @@ -1,9 +1,9 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. - +# Copyright (c) 2021-2024, NVIDIA CORPORATION. from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler cdef extern from "cudf/utilities/span.hpp" namespace "cudf" nogil: cdef cppclass host_span[T]: - host_span() except + - host_span(vector[T]) except + + host_span() except +libcudf_exception_handler + host_span(vector[T]) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd index 5533530754e..93f13a7e11f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd @@ -1,7 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. - from libcpp cimport bool from libcpp.vector cimport vector +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd index fbeb6e9db90..c3e3232b5cc 100644 --- a/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/utilities/type_dispatcher.pxd @@ -1,5 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. - +from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport type_id diff --git a/python/pylibcudf/pylibcudf/tests/test_column_factories.py b/python/pylibcudf/pylibcudf/tests/test_column_factories.py index e317362a76b..e1c8a8303ec 100644 --- a/python/pylibcudf/pylibcudf/tests/test_column_factories.py +++ b/python/pylibcudf/pylibcudf/tests/test_column_factories.py @@ -105,7 +105,7 @@ def test_make_empty_column_dtype(pa_type): plc_type = plc.interop.from_arrow(pa_col).type() if isinstance(pa_type, (pa.ListType, pa.StructType)): - with pytest.raises(ValueError): + with pytest.raises(TypeError): plc.column_factories.make_empty_column(plc_type) return @@ -119,7 +119,7 @@ def test_make_empty_column_typeid(pa_type): tid = plc.interop.from_arrow(pa_col).type().id() if isinstance(pa_type, (pa.ListType, pa.StructType)): - with pytest.raises(ValueError): + with pytest.raises(TypeError): plc.column_factories.make_empty_column(tid) return @@ -154,7 +154,7 @@ def test_make_numeric_column(numeric_pa_type, mask_state): ) def test_make_numeric_column_dtype_err(non_numeric_pa_type): plc_type = plc.interop.from_arrow(non_numeric_pa_type) - with pytest.raises(ValueError): + with pytest.raises(TypeError): plc.column_factories.make_numeric_column( plc_type, 3, plc.types.MaskState.UNALLOCATED ) @@ -183,7 +183,7 @@ def test_make_fixed_point_column(fixed_point_pa_type, mask_state): ) def test_make_fixed_point_column_dtype_err(non_fixed_point_pa_type): plc_type = plc.interop.from_arrow(non_fixed_point_pa_type) - with pytest.raises(ValueError): + with pytest.raises(TypeError): plc.column_factories.make_fixed_point_column( plc_type, 3, plc.types.MaskState.UNALLOCATED ) @@ -211,7 +211,7 @@ def test_make_timestamp_column(timestamp_pa_type, mask_state): ) def test_make_timestamp_column_dtype_err(non_timestamp_pa_type): plc_type = plc.interop.from_arrow(non_timestamp_pa_type) - with pytest.raises(ValueError): + with pytest.raises(TypeError): plc.column_factories.make_timestamp_column( plc_type, 3, plc.types.MaskState.UNALLOCATED ) @@ -239,7 +239,7 @@ def test_make_duration_column(duration_pa_type, mask_state): ) def test_make_duration_column_dtype_err(non_duration_pa_type): plc_type = plc.interop.from_arrow(non_duration_pa_type) - with pytest.raises(ValueError): + with pytest.raises(TypeError): plc.column_factories.make_duration_column( plc_type, 3, plc.types.MaskState.UNALLOCATED ) diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index e83db47830c..dc82eb363d0 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -19,12 +19,12 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "cuda-python>=11.7.1,<12.0a0,<=11.8.3", - "libcudf==24.12.*,>=0.0.0a0", + "libcudf==25.2.*,>=0.0.0a0", "nvtx>=0.2.1", "packaging", "pyarrow>=14.0.0,<19.0.0a0,!=17.0.0; platform_machine=='aarch64'", "pyarrow>=14.0.0,<19.0.0a0; platform_machine=='x86_64'", - "rmm==24.12.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -113,10 +113,10 @@ matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", - "libcudf==24.12.*,>=0.0.0a0", - "librmm==24.12.*,>=0.0.0a0", + "libcudf==25.2.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", - "rmm==24.12.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.scikit-build]