From 8f31e436cd92cfe2f46bb61ee25ff7cc6bef4177 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 15 Feb 2024 05:30:26 -0600 Subject: [PATCH 1/3] Support CUDA 12.2 (#4088) * switches to CUDA 12.2.2 for building conda packages and wheels * adds new tests running against CUDA 12.2.2 ### Notes for Reviewers This is part of ongoing work to build and test packages against CUDA 12.2.2 across all of RAPIDS. For more details see: * https://github.com/rapidsai/build-planning/issues/7 * https://github.com/rapidsai/shared-workflows/pull/166 Planning a second round of PRs to revert these references back to a proper `branch-24.{nn}` release branch of `shared-workflows` once https://github.com/rapidsai/shared-workflows/pull/166 is merged. *(created with `rapids-reviser`)* Authors: - James Lamb (https://github.com/jameslamb) - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - https://github.com/jakirkham Approvers: - Jake Awe (https://github.com/AyodeAwe) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4088 --- .github/workflows/build.yaml | 32 +++++++------- .github/workflows/pr.yaml | 42 +++++++++---------- .github/workflows/test.yaml | 16 +++---- ci/build_docs.sh | 2 +- ..._64.yaml => all_cuda-122_arch-x86_64.yaml} | 4 +- conda/recipes/cugraph/meta.yaml | 10 ++++- conda/recipes/libcugraph/meta.yaml | 19 ++++++++- conda/recipes/pylibcugraph/meta.yaml | 11 ++++- cpp/tests/CMakeLists.txt | 9 +++- dependencies.yaml | 6 ++- docs/cugraph/source/conf.py | 2 +- fetch_rapids.cmake | 2 +- python/cugraph/CMakeLists.txt | 2 +- python/pylibcugraph/CMakeLists.txt | 2 +- 14 files changed, 102 insertions(+), 57 deletions(-) rename conda/environments/{all_cuda-120_arch-x86_64.yaml => all_cuda-122_arch-x86_64.yaml} (96%) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 5ecff1f8a75..92f70bb2aa7 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -47,7 +47,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -83,7 +83,7 @@ jobs: wheel-publish-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -93,7 +93,7 @@ jobs: wheel-build-cugraph: needs: wheel-publish-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -106,7 +106,7 @@ jobs: wheel-publish-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -116,7 +116,7 @@ jobs: wheel-build-nx-cugraph: needs: wheel-publish-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -126,7 +126,7 @@ jobs: wheel-publish-nx-cugraph: needs: wheel-build-nx-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -136,7 +136,7 @@ jobs: wheel-build-cugraph-dgl: needs: wheel-publish-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -146,7 +146,7 @@ jobs: wheel-publish-cugraph-dgl: needs: wheel-build-cugraph-dgl secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -156,7 +156,7 @@ jobs: wheel-build-cugraph-pyg: needs: wheel-publish-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -166,7 +166,7 @@ jobs: wheel-publish-cugraph-pyg: needs: wheel-build-cugraph-pyg secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -175,7 +175,7 @@ jobs: package-name: cugraph-pyg wheel-build-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -185,7 +185,7 @@ jobs: wheel-publish-cugraph-equivariant: needs: wheel-build-cugraph-equivariant secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 349d682a59e..36a53df14e1 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -33,41 +33,41 @@ jobs: - wheel-tests-cugraph-equivariant - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@test-cuda-12.2 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@test-cuda-12.2 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2 with: build_type: pull-request node_type: cpu32 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2 with: build_type: pull-request conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,7 +77,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -87,7 +87,7 @@ jobs: wheel-build-pylibcugraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/build_wheel_pylibcugraph.sh @@ -98,14 +98,14 @@ jobs: wheel-tests-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/test_wheel_pylibcugraph.sh wheel-build-cugraph: needs: wheel-tests-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/build_wheel_cugraph.sh @@ -115,35 +115,35 @@ jobs: wheel-tests-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/test_wheel_cugraph.sh wheel-build-nx-cugraph: needs: wheel-tests-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/build_wheel_nx-cugraph.sh wheel-tests-nx-cugraph: needs: wheel-build-nx-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/test_wheel_nx-cugraph.sh wheel-build-cugraph-dgl: needs: wheel-tests-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/build_wheel_cugraph-dgl.sh wheel-tests-cugraph-dgl: needs: wheel-build-cugraph-dgl secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/test_wheel_cugraph-dgl.sh @@ -151,35 +151,35 @@ jobs: wheel-build-cugraph-pyg: needs: wheel-tests-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/build_wheel_cugraph-pyg.sh wheel-tests-cugraph-pyg: needs: wheel-build-cugraph-pyg secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/test_wheel_cugraph-pyg.sh matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0")) wheel-build-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/build_wheel_cugraph-equivariant.sh wheel-tests-cugraph-equivariant: needs: wheel-build-cugraph-equivariant secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: pull-request script: ci/test_wheel_cugraph-equivariant.sh matrix_filter: map(select(.ARCH == "amd64")) devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@test-cuda-12.2 with: node_type: cpu32 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f6a04fcc776..7e81354d1b8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -41,7 +41,7 @@ jobs: script: ci/test_wheel_pylibcugraph.sh wheel-tests-cugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -50,7 +50,7 @@ jobs: script: ci/test_wheel_cugraph.sh wheel-tests-nx-cugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -59,7 +59,7 @@ jobs: script: ci/test_wheel_nx-cugraph.sh wheel-tests-cugraph-dgl: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: matrix_filter: map(select(.ARCH == "amd64")) wheel-tests-cugraph-pyg: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0")) wheel-tests-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 0ed2e69ae90..298a8b68791 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -euo pipefail diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml similarity index 96% rename from conda/environments/all_cuda-120_arch-x86_64.yaml rename to conda/environments/all_cuda-122_arch-x86_64.yaml index 03dade0ed1f..65734c96ff9 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-122_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-version=12.0 +- cuda-version=12.2 - cudf==24.4.* - cupy>=12.0.0 - cxx-compiler @@ -81,4 +81,4 @@ dependencies: - ucx-py==0.37.* - wget - wheel -name: all_cuda-120_arch-x86_64 +name: all_cuda-122_arch-x86_64 diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index b8e3072dd38..ed214e4eb6e 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -34,7 +34,12 @@ build: - SCCACHE_S3_USE_SSL - SCCACHE_S3_NO_CREDENTIALS ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% else %} - {{ compiler('cuda') }} + - cuda-cudart-dev + {% endif %} requirements: build: @@ -53,6 +58,8 @@ requirements: - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cudatoolkit + {% else %} + - cuda-cudart-dev {% endif %} - cudf ={{ minor_version }} - cython >=3.0.0 @@ -70,6 +77,7 @@ requirements: - cudatoolkit - cuda-python >=11.7.1,<12.0a0 {% else %} + - cuda-cudart - cuda-python >=12.0,<13.0a0 {% endif %} - cudf ={{ minor_version }} diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 66f72e6b6b5..31514a3a21d 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -88,12 +88,16 @@ outputs: ignore_run_exports_from: {% if cuda_major == "11" %} - {{ compiler('cuda11') }} + {% else %} + - {{ compiler('cuda') }} + - cuda-cudart-dev {% endif %} requirements: build: - cmake {{ cmake_version }} host: - cuda-version ={{ cuda_version }} + - cuda-cudart-dev run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} @@ -105,6 +109,7 @@ outputs: - libcusparse {{ cuda11_libcusparse_run_version }} {% else %} - cuda-profiler-api + - cuda-cudart - libcublas - libcurand - libcusolver @@ -131,17 +136,23 @@ outputs: ignore_run_exports_from: {% if cuda_major == "11" %} - {{ compiler('cuda11') }} + {% else %} + - {{ compiler('cuda') }} + - cuda-cudart-dev {% endif %} requirements: build: - cmake {{ cmake_version }} host: - cuda-version ={{ cuda_version }} + - cuda-cudart-dev run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - {{ pin_subpackage('libcugraph', exact=True) }} {% if cuda_major == "11" %} - cudatoolkit + {% else %} + - cuda-cudart {% endif %} - libcudf ={{ minor_version }} - librmm ={{ minor_version }} @@ -160,18 +171,24 @@ outputs: ignore_run_exports_from: {% if cuda_major == "11" %} - {{ compiler('cuda11') }} + {% else %} + - {{ compiler('cuda') }} + - cuda-cudart-dev {% endif %} requirements: build: - cmake {{ cmake_version }} host: - cuda-version ={{ cuda_version }} + - cuda-cudart-dev run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - {{ pin_subpackage('libcugraph_etl', exact=True) }} - {{ pin_subpackage('libcugraph', exact=True) }} {% if cuda_major == "11" %} - cudatoolkit + {% else %} + - cuda-cudart {% endif %} - gmock {{ gtest_version }} - gtest {{ gtest_version }} diff --git a/conda/recipes/pylibcugraph/meta.yaml b/conda/recipes/pylibcugraph/meta.yaml index 0f66f55ccaa..42b5bc625eb 100644 --- a/conda/recipes/pylibcugraph/meta.yaml +++ b/conda/recipes/pylibcugraph/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -34,7 +34,12 @@ build: - SCCACHE_S3_USE_SSL - SCCACHE_S3_NO_CREDENTIALS ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% else %} - {{ compiler('cuda') }} + - cuda-cudart-dev + {% endif %} requirements: build: @@ -53,6 +58,8 @@ requirements: - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cudatoolkit + {% else %} + - cuda-cudart-dev {% endif %} - cython >=3.0.0 - libcugraph ={{ version }} @@ -64,6 +71,8 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit + {% else %} + - cuda-cudart {% endif %} - libcugraph ={{ version }} - python diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3df979fe5c2..1e5d0489b1f 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -737,7 +737,14 @@ ConfigureCTest(CAPI_RANDOM_WALKS_TEST c_api/sg_random_walks_test.c) ConfigureCTest(CAPI_TRIANGLE_COUNT_TEST c_api/triangle_count_test.c) ConfigureCTest(CAPI_LOUVAIN_TEST c_api/louvain_test.c) ConfigureCTest(CAPI_LEIDEN_TEST c_api/leiden_test.c) -ConfigureCTest(CAPI_LEGACY_SPECTRAL_TEST c_api/legacy_spectral_test.c) +############################################################################# +# Skipping due to CUDA 12.2 failure that traces back to RAFT # +# TODO: Uncomment this once the issue is fixed. # +# # +# xref: https://github.com/rapidsai/cugraph/issues/4173 # +# xref: https://github.com/rapidsai/raft/issues/2186 # +############################################################################# +# ConfigureCTest(CAPI_LEGACY_SPECTRAL_TEST c_api/legacy_spectral_test.c) ConfigureCTest(CAPI_LEGACY_ECG_TEST c_api/legacy_ecg_test.c) ConfigureCTest(CAPI_CORE_NUMBER_TEST c_api/core_number_test.c) ConfigureCTest(CAPI_SIMILARITY_TEST c_api/similarity_test.c) diff --git a/dependencies.yaml b/dependencies.yaml index 58354407bbc..1bfe5fde709 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: [conda] matrix: - cuda: ["11.8", "12.0"] + cuda: ["11.8", "12.2"] arch: [x86_64] includes: - checks @@ -320,6 +320,10 @@ dependencies: cuda: "12.0" packages: - cuda-version=12.0 + - matrix: + cuda: "12.2" + packages: + - cuda-version=12.2 cuda: specific: - output_types: [conda] diff --git a/docs/cugraph/source/conf.py b/docs/cugraph/source/conf.py index 141e14a04ab..719f94d4838 100644 --- a/docs/cugraph/source/conf.py +++ b/docs/cugraph/source/conf.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # pygdf documentation build configuration file, created by # sphinx-quickstart on Wed May 3 10:59:22 2017. diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 3f90238109a..596908d01cd 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt index f3548192fe2..79510f9bf8d 100644 --- a/python/cugraph/CMakeLists.txt +++ b/python/cugraph/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/python/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/CMakeLists.txt index 0901cce0ae9..cc76ab5ba2b 100644 --- a/python/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at From a0947995eb1556bf46957804bdbd02c3ee4cb0a9 Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:47:30 +0100 Subject: [PATCH 2/3] Update SG notebook (#4169) Update SG notebook to handle a handle a couple of cases where the current notebook would fail for unweighted graph and if the graph doesn't have node id 0. **Changes for SG notebook:** - For graphs without weights, run bfs instead of sssp. - For the input graph doesn't have node with id 0, peek an existing node id as seed for BFS and SSP - simplify bookkeeping for run statistics **Changes for MG notebook:** - Call SSSP or BFS based on weighted attribute Authors: - Naim (https://github.com/naimnv) Approvers: - Brad Rees (https://github.com/BradReesWork) - Don Acosta (https://github.com/acostadon) URL: https://github.com/rapidsai/cugraph/pull/4169 --- ...e.ipynb => synth_release_single_gpu.ipynb} | 276 ++++++++---------- .../synth_release_single_node_multi_gpu.ipynb | 43 +-- 2 files changed, 154 insertions(+), 165 deletions(-) rename notebooks/cugraph_benchmarks/{synth_release.ipynb => synth_release_single_gpu.ipynb} (77%) diff --git a/notebooks/cugraph_benchmarks/synth_release.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb similarity index 77% rename from notebooks/cugraph_benchmarks/synth_release.ipynb rename to notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb index 18979f3ecee..1acef5d558b 100644 --- a/notebooks/cugraph_benchmarks/synth_release.ipynb +++ b/notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb @@ -40,7 +40,8 @@ "| Author | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|---------------------|-----------------|------------------------|\n", "| Don Acosta | 1/12/2023 | Created | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", - "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n" + "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", + "| Naim, Md | 2/12/2024 | Modified | 24.04 nightly | RTX A6000, CUDA 12.0 |\n" ] }, { @@ -124,12 +125,11 @@ "import gc\n", "import os\n", "from time import perf_counter\n", - "import numpy as np\n", - "import math\n", + "import pandas as pd\n", + "from collections import defaultdict\n", "\n", "# rapids\n", "import cugraph\n", - "import cudf\n", "\n", "# NetworkX libraries\n", "import networkx as nx\n", @@ -212,7 +212,7 @@ "\n", "\n", "# Which dataset is to be used\n", - "data = data_full\n" + "data = data_quick\n" ] }, { @@ -518,16 +518,13 @@ "metadata": {}, "outputs": [], "source": [ - "def nx_bfs(_G):\n", - " seed = 0\n", + "def nx_bfs(_G, seed):\n", " t1 = perf_counter()\n", - " nb = nx.bfs_edges(_G, seed)\n", - " nb_list = list(nb) # gen -> list\n", + " _ = nx.bfs_edges(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n", "\n", - "def cu_bfs(_G):\n", - " seed = 0\n", + "def cu_bfs(_G, seed):\n", " t1 = perf_counter()\n", " _ = cugraph.bfs(_G, seed)\n", " t2 = perf_counter() - t1\n", @@ -547,17 +544,21 @@ "metadata": {}, "outputs": [], "source": [ - "def nx_sssp(_G):\n", - " seed = 0\n", + "def nx_sssp(_G, seed):\n", " t1 = perf_counter()\n", - " _ = nx.shortest_path(_G, seed)\n", + " if nx.is_weighted(_G):\n", + " _ = nx.shortest_path(_G, seed)\n", + " else:\n", + " _ = nx.bfs_edges(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n", "\n", - "def cu_sssp(_G):\n", - " seed = 0\n", + "def cu_sssp(_G, seed):\n", " t1 = perf_counter()\n", - " _ = cugraph.sssp(_G, seed)\n", + " if _G.weighted:\n", + " _ = cugraph.sssp(_G, seed)\n", + " else:\n", + " _ = cugraph.bfs(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n" ] @@ -571,6 +572,27 @@ "# Benchmark" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Placeholders to collect algorithm run statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cugraph_algo_run_times = defaultdict(defaultdict)\n", + "nx_algo_run_times = defaultdict(defaultdict)\n", + "cugraph_graph_creation_times = defaultdict()\n", + "nx_graph_creation_times = defaultdict()\n", + "perf_algos = defaultdict(defaultdict)\n", + "perf = defaultdict(defaultdict)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -587,79 +609,50 @@ "metadata": {}, "outputs": [], "source": [ - "# arrays to capture performance gains\n", - "names = []\n", - "algos = []\n", - "graph_create_cu = []\n", - "graph_create_nx = []\n", - "\n", - "# Two dimension data [file, perf]\n", - "time_algo_nx = [] # NetworkX\n", - "time_algo_cu = [] # cuGraph\n", - "perf = []\n", - "perf_algo = []\n", "\n", - "algos.append(\" \")\n", - "\n", - "i = 0\n", - "for k,v in data.items():\n", - " # init all the 2-d arrays\n", - " time_algo_nx.append([])\n", - " time_algo_cu.append([])\n", - " perf.append([])\n", - " perf_algo.append([])\n", - "\n", - " # Saved the file Name\n", - " names.append(k)\n", + "for dataset, scale in data.items():\n", "\n", " # generate data\n", " print(\"------------------------------\")\n", - " print(f'Creating Graph of Scale = {v}')\n", - "\n", - " gdf = generate_data(v)\n", + " print(f'Creating Graph of Scale = {scale}')\n", + " \n", + " gdf = generate_data(scale)\n", " pdf = gdf.to_pandas()\n", - " print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n", "\n", - " # create the graphs\n", + " print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n", + " \n", + " # create cuGraph and NX graphs\n", " g_cu, tcu = create_cu_graph(gdf)\n", " g_nx, tnx = create_nx_graph(pdf)\n", - " graph_create_cu.append(tcu)\n", - " graph_create_nx.append(tnx)\n", + " cugraph_graph_creation_times[dataset] = tcu\n", + " nx_graph_creation_times[dataset] = tnx\n", " del gdf, pdf\n", "\n", " # prep\n", " deg = g_cu.degree()\n", " deg_max = deg['degree'].max()\n", - "\n", " alpha = 1 / deg_max\n", " num_nodes = g_cu.number_of_vertices()\n", - "\n", " del deg\n", " gc.collect()\n", "\n", - " #----- Algorithm order is same as defined at top ----\n", - "\n", " #-- Katz \n", - " print(\"\\tKatz \", end = '')\n", - " if i == 0: \n", - " algos.append(\"Katz\")\n", - "\n", + " algorithm = \"Katz\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_katz(g_nx, alpha)\n", " print(\"c.\", end='')\n", " tc = cu_katz(g_cu, alpha)\n", " print(\"\")\n", - "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " \n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- BC\n", - " print(\"\\tBC k=100 \", end='')\n", - " if i == 0:\n", - " algos.append(\"BC Estimate fixed\")\n", - "\n", + " algorithm = \"BC\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " k = 100\n", " if k > num_nodes:\n", " k = int(num_nodes)\n", @@ -668,80 +661,70 @@ " print(\"c.\", end='')\n", " tc = cu_bc(g_cu, k)\n", " print(\" \")\n", - "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- Louvain\n", - " print(\"\\tLouvain \", end='')\n", - " if i == 0:\n", - " algos.append(\"Louvain\")\n", - "\n", + " algorithm = \"Louvain\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_louvain(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_louvain(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- TC\n", - " print(\"\\tTC \", end='')\n", - " if i == 0:\n", - " algos.append(\"TC\")\n", - "\n", + " algorithm = \"TC\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_tc(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_tc(g_cu)\n", " print(\" \")\n", - "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " \n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- Core Number\n", - " print(\"\\tCore Number \", end='')\n", - " if i == 0:\n", - " algos.append(\"Core Number\")\n", - "\n", + " algorithm = \"Core Number\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_core_num(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_core_num(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- PageRank\n", - " print(\"\\tPageRank \", end='')\n", - " if i == 0:\n", - " algos.append(\"PageRank\")\n", - "\n", + " algorithm = \"PageRank\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_pagerank(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_pagerank(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- Jaccard\n", - " print(\"\\tJaccard \", end='')\n", - " if i == 0:\n", - " algos.append(\"Jaccard\")\n", + " algorithm = \"Jaccard\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", "\n", " print(\"n.\", end='')\n", " tx = nx_jaccard(g_nx)\n", @@ -749,46 +732,44 @@ " tc = cu_jaccard(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", - " #-- BFS\n", - " print(\"\\tBFS \", end='')\n", - " if i == 0:\n", - " algos.append(\"BFS\")\n", + " # Seed for BFS and SSSP\n", + " nx_seed = list(g_nx.nodes)[0]\n", + " cu_seed = g_cu.nodes().to_pandas().iloc[0]\n", "\n", + " #-- BFS\n", + " algorithm = \"BFS\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_bfs(g_nx)\n", + " tx = nx_bfs(g_nx, seed=nx_seed)\n", " print(\"c.\", end='')\n", - " tc = cu_bfs(g_cu)\n", + " tc = cu_bfs(g_cu, seed=cu_seed)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- SSSP\n", - " print(\"\\tSSSP \", end='')\n", - " if i == 0:\n", - " algos.append(\"SSP\")\n", - "\n", + " algorithm = \"SSSP\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_sssp(g_nx)\n", + " tx = nx_sssp(g_nx, seed=nx_seed)\n", + "\n", " print(\"c.\", end='')\n", - " tc = cu_sssp(g_cu)\n", + " tc = cu_sssp(g_cu, seed=cu_seed)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", - " # increament count\n", - " i = i + 1\n", - " \n", " del g_cu, g_nx\n", " gc.collect()\n" ] @@ -799,13 +780,11 @@ "metadata": {}, "outputs": [], "source": [ - "#Print results\n", - "print(algos)\n", - "\n", - "for i in range(num_datasets):\n", - " print(f\"{names[i]}\")\n", - " print(f\"{perf[i]}\")\n", - " print(f\"{perf_algo[i]}\")" + "# Speedup\n", + "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX)------\\n\")\n", + "print(pd.DataFrame(perf))\n", + "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX, excluding graph creation time)------\\n\")\n", + "print(pd.DataFrame(perf_algos))" ] }, { @@ -814,15 +793,16 @@ "metadata": {}, "outputs": [], "source": [ - "#Print results\n", - "print(\"\\n------------------------------\")\n", - "print(\"\\tAlgorithm Run times (NX then cuGraph)\\n\")\n", + "# Nx and cuGraph execution times for different algorithms\n", + "nx_and_cugraph_run_times = pd.DataFrame()\n", + "for dataset in cugraph_algo_run_times.keys():\n", + " temp_df = pd.DataFrame({'NX': nx_algo_run_times[dataset], 'cuGraph': cugraph_algo_run_times[dataset]})\n", + " columns = [(dataset, 'cuGraph'), (dataset, 'NX')]\n", + " temp_df.columns = pd.MultiIndex.from_tuples(columns)\n", + " nx_and_cugraph_run_times = pd.concat([temp_df, nx_and_cugraph_run_times], axis=1)\n", "\n", - "print(algos)\n", - "for i in range(num_datasets):\n", - " print(f\"{names[i]}\")\n", - " print(f\"{time_algo_nx[i]}\")\n", - " print(f\"{time_algo_cu[i]}\")" + "print(\"\\n\\t------cuGraph and NX execution times for different algorithms-----\\n\")\n", + "print(nx_and_cugraph_run_times)" ] }, { @@ -855,7 +835,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 15:55:03) \n[GCC 10.4.0]" + "version": "3.1.0" }, "vscode": { "interpreter": { diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb index c44f475c441..dd09830160f 100644 --- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb +++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb @@ -39,9 +39,9 @@ " \n", "| Author | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|---------------------|-----------------|------------------------|\n", - "| Don Acosta | 1/12/2023 | Created | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", - "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", - "| Naim, Md | 2/08/2024 | Modified for SNMG | 24.04 nightly | RTX A6000, CUDA 12.0 |\n" + "| Don Acosta | 1/12/2023 | Created | 23.02 nightly | 2xRTX A6000, CUDA 11.7 |\n", + "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | 2xRTX A6000, CUDA 11.7 |\n", + "| Naim, Md | 2/08/2024 | Modified for SNMG | 24.04 nightly | 2xRTX A6000, CUDA 12.0 |\n" ] }, { @@ -590,7 +590,10 @@ "source": [ "def nx_sssp(_G, seed):\n", " t1 = perf_counter()\n", - " _ = nx.shortest_path(_G, seed)\n", + " if nx.is_weighted(_G):\n", + " _ = nx.shortest_path(_G, seed)\n", + " else:\n", + " _ = nx.bfs_edges(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n", "\n", @@ -599,9 +602,17 @@ " t1 = perf_counter()\n", " # SSSP requires weighted graph\n", " if mg:\n", - " _ = cugraph.dask.bfs(_G, seed)\n", + " if _G.weighted: \n", + " _ = cugraph.dask.sssp(_G, seed)\n", + " else:\n", + " _ = cugraph.dask.bfs(_G, seed)\n", + "\n", " else:\n", - " _ = cugraph.bfs(_G, seed)\n", + " if _G.weighted:\n", + " _ = cugraph.ssp(_G, seed)\n", + " else:\n", + " _ = cugraph.bfs(_G, seed)\n", + "\n", " t2 = perf_counter() - t1\n", " return t2\n" ] @@ -646,14 +657,12 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", - "nx_algo_run_times = defaultdict(defaultdict)\n", "cugraph_algo_run_times = defaultdict(defaultdict)\n", - "perf_algos = defaultdict(defaultdict)\n", - "perf = defaultdict(defaultdict)\n", + "nx_algo_run_times = defaultdict(defaultdict)\n", "cugraph_graph_creation_times = defaultdict()\n", - "nx_graph_creation_times = defaultdict()\n" + "nx_graph_creation_times = defaultdict()\n", + "perf_algos = defaultdict(defaultdict)\n", + "perf = defaultdict(defaultdict)" ] }, { @@ -811,9 +820,9 @@ " algorithm = \"BFS\"\n", " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_bfs(g_nx, nx_seed)\n", + " tx = nx_bfs(g_nx, seed=nx_seed)\n", " print(\"c.\", end='')\n", - " tc = cu_bfs(g_cu, seed = cu_seed, mg=True)\n", + " tc = cu_bfs(g_cu, seed=cu_seed, mg=True)\n", " print(\" \")\n", "\n", " nx_algo_run_times[dataset][algorithm] = tx\n", @@ -825,10 +834,10 @@ " algorithm = \"SSSP\"\n", " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_sssp(g_nx, nx_seed)\n", + " tx = nx_sssp(g_nx, seed=nx_seed)\n", "\n", " print(\"c.\", end='')\n", - " tc = cu_sssp(g_cu, seed = cu_seed, mg=True)\n", + " tc = cu_sssp(g_cu, seed=cu_seed, mg=True)\n", " print(\" \")\n", "\n", " nx_algo_run_times[dataset][algorithm] = tx\n", @@ -856,7 +865,7 @@ "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX)------\\n\")\n", "print(pd.DataFrame(perf))\n", "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX, excluding graph creation time)------\\n\")\n", - "print(pd.DataFrame(perf_algos))\n" + "print(pd.DataFrame(perf_algos))" ] }, { From f0388bcb65b27a5eee2cc9f9627109594b85c244 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 16 Feb 2024 17:50:16 -0800 Subject: [PATCH 3/3] Update cugraph for compatibility with the latest cuco (#4111) This PR updates cugraph to make it compatible with the latest cuco. Depends on https://github.com/rapidsai/rapids-cmake/pull/526 CMake changes will be reverted once https://github.com/rapidsai/rapids-cmake/pull/526 is merged. Authors: - Yunsong Wang (https://github.com/PointKernel) - Vyas Ramasubramani (https://github.com/vyasr) - Naim (https://github.com/naimnv) - Brad Rees (https://github.com/BradReesWork) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4111 --- cpp/src/prims/key_store.cuh | 81 +++++++------- cpp/src/prims/kv_store.cuh | 205 +++++++++++++++++++----------------- 2 files changed, 147 insertions(+), 139 deletions(-) diff --git a/cpp/src/prims/key_store.cuh b/cpp/src/prims/key_store.cuh index 6d135b4e94e..907ca36ef4a 100644 --- a/cpp/src/prims/key_store.cuh +++ b/cpp/src/prims/key_store.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,8 @@ namespace cugraph { namespace detail { +using cuco_storage_type = cuco::storage<1>; ///< cuco window storage type + template struct key_binary_search_contains_op_t { using key_type = typename thrust::iterator_traits::value_type; @@ -70,9 +72,8 @@ struct key_binary_search_store_device_view_t { template struct key_cuco_store_contains_device_view_t { - using key_type = typename ViewType::key_type; - using cuco_store_device_ref_type = - typename ViewType::cuco_store_type::ref_type; + using key_type = typename ViewType::key_type; + using cuco_store_device_ref_type = typename ViewType::cuco_set_type::ref_type; static_assert(!ViewType::binary_search); @@ -88,9 +89,8 @@ struct key_cuco_store_contains_device_view_t { template struct key_cuco_store_insert_device_view_t { - using key_type = typename ViewType::key_type; - using cuco_store_device_ref_type = - typename ViewType::cuco_store_type::ref_type; + using key_type = typename ViewType::key_type; + using cuco_store_device_ref_type = typename ViewType::cuco_set_type::ref_type; static_assert(!ViewType::binary_search); @@ -147,16 +147,17 @@ class key_cuco_store_view_t { static constexpr bool binary_search = false; - using cuco_store_type = cuco::experimental::static_set< - key_t, - cuco::experimental::extent, - cuda::thread_scope_device, - thrust::equal_to, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>, - rmm::mr::stream_allocator_adaptor>>; + using cuco_set_type = + cuco::static_set, + cuda::thread_scope_device, + thrust::equal_to, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>, + cuco_storage_type>; - key_cuco_store_view_t(cuco_store_type const* store) : cuco_store_(store) {} + key_cuco_store_view_t(cuco_set_type const* store) : cuco_store_(store) {} template void contains(QueryKeyIterator key_first, @@ -167,17 +168,14 @@ class key_cuco_store_view_t { cuco_store_->contains(key_first, key_last, value_first, stream); } - auto cuco_store_contains_device_ref() const - { - return cuco_store_->ref(cuco::experimental::contains); - } + auto cuco_store_contains_device_ref() const { return cuco_store_->ref(cuco::contains); } - auto cuco_store_insert_device_ref() const { return cuco_store_->ref(cuco::experimental::insert); } + auto cuco_store_insert_device_ref() const { return cuco_store_->ref(cuco::insert); } key_t invalid_key() const { return cuco_store_->get_empty_key_sentinel(); } private: - cuco_store_type const* cuco_store_{}; + cuco_set_type const* cuco_store_{}; }; template @@ -240,14 +238,15 @@ class key_cuco_store_t { public: using key_type = key_t; - using cuco_store_type = cuco::experimental::static_set< - key_t, - cuco::experimental::extent, - cuda::thread_scope_device, - thrust::equal_to, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>, - rmm::mr::stream_allocator_adaptor>>; + using cuco_set_type = + cuco::static_set, + cuda::thread_scope_device, + thrust::equal_to, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>, + cuco_storage_type>; key_cuco_store_t(rmm::cuda_stream_view stream) {} @@ -306,7 +305,7 @@ class key_cuco_store_t { return keys; } - cuco_store_type const* cuco_store_ptr() const { return cuco_store_.get(); } + cuco_set_type const* cuco_store_ptr() const { return cuco_store_.get(); } key_t invalid_key() const { return cuco_store_->empty_key_sentinel(); } @@ -324,17 +323,19 @@ class key_cuco_store_t { auto stream_adapter = rmm::mr::make_stream_allocator_adaptor( rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()), stream); - cuco_store_ = std::make_unique( - cuco_size, - cuco::sentinel::empty_key{invalid_key}, - thrust::equal_to{}, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>{}, - stream_adapter, - stream.value()); + cuco_store_ = + std::make_unique(cuco_size, + cuco::sentinel::empty_key{invalid_key}, + thrust::equal_to{}, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>{}, + cuco::thread_scope_device, + cuco_storage_type{}, + stream_adapter, + stream.value()); } - std::unique_ptr cuco_store_{nullptr}; + std::unique_ptr cuco_store_{nullptr}; size_t capacity_{0}; size_t size_{0}; // caching as cuco_store_->size() is expensive (this scans the entire slots to diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index f17441ad6ab..be4fde2fbff 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,6 +49,8 @@ namespace cugraph { namespace detail { +using cuco_storage_type = cuco::storage<1>; ///< cuco window storage type + template struct kv_binary_search_find_op_t { using key_type = typename thrust::iterator_traits::value_type; @@ -86,18 +88,19 @@ struct kv_binary_search_contains_op_t { template struct kv_cuco_insert_and_increment_t { - using key_type = typename thrust::iterator_traits::value_type; - using cuco_store_type = cuco::experimental::static_map< - key_type, - size_t, - cuco::experimental::extent, - cuda::thread_scope_device, - thrust::equal_to, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>, - rmm::mr::stream_allocator_adaptor>>; - - typename cuco_store_type::ref_type device_ref{}; + using key_type = typename thrust::iterator_traits::value_type; + using cuco_set_type = + cuco::static_map, + cuda::thread_scope_device, + thrust::equal_to, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>, + cuco_storage_type>; + + typename cuco_set_type::ref_type device_ref{}; KeyIterator key_first{}; size_t* counter{nullptr}; size_t invalid_idx{}; @@ -109,7 +112,7 @@ struct kv_cuco_insert_and_increment_t { if (inserted) { cuda::atomic_ref atomic_counter(*counter); auto idx = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); - using ref_type = typename cuco_store_type::ref_type; + using ref_type = typename cuco_set_type::ref_type; cuda::atomic_ref ref( (*iter).second); ref.store(idx, cuda::std::memory_order_relaxed); @@ -122,18 +125,19 @@ struct kv_cuco_insert_and_increment_t { template struct kv_cuco_insert_if_and_increment_t { - using key_type = typename thrust::iterator_traits::value_type; - using cuco_store_type = cuco::experimental::static_map< - key_type, - size_t, - cuco::experimental::extent, - cuda::thread_scope_device, - thrust::equal_to, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>, - rmm::mr::stream_allocator_adaptor>>; - - typename cuco_store_type::ref_type device_ref{}; + using key_type = typename thrust::iterator_traits::value_type; + using cuco_set_type = + cuco::static_map, + cuda::thread_scope_device, + thrust::equal_to, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>, + cuco_storage_type>; + + typename cuco_set_type::ref_type device_ref{}; KeyIterator key_first{}; StencilIterator stencil_first{}; PredOp pred_op{}; @@ -149,7 +153,7 @@ struct kv_cuco_insert_if_and_increment_t { if (inserted) { cuda::atomic_ref atomic_counter(*counter); auto idx = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); - using ref_type = typename cuco_store_type::ref_type; + using ref_type = typename cuco_set_type::ref_type; cuda::atomic_ref ref( (*iter).second); ref.store(idx, cuda::std::memory_order_relaxed); @@ -162,23 +166,24 @@ struct kv_cuco_insert_if_and_increment_t { template struct kv_cuco_insert_and_assign_t { - using cuco_store_type = cuco::experimental::static_map< - key_t, - std::conditional_t, value_t, size_t>, - cuco::experimental::extent, - cuda::thread_scope_device, - thrust::equal_to, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>, - rmm::mr::stream_allocator_adaptor>>; - - typename cuco_store_type::ref_type device_ref{}; + using cuco_set_type = + cuco::static_map, value_t, size_t>, + cuco::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>, + cuco_storage_type>; + + typename cuco_set_type::ref_type device_ref{}; __device__ void operator()(thrust::tuple pair) { auto [iter, inserted] = device_ref.insert_and_find(pair); if (!inserted) { - using ref_type = typename cuco_store_type::ref_type; + using ref_type = typename cuco_set_type::ref_type; cuda::atomic_ref ref( (*iter).second); ref.store(thrust::get<1>(pair), cuda::std::memory_order_relaxed); @@ -220,10 +225,9 @@ struct kv_binary_search_store_device_view_t { template struct kv_cuco_store_find_device_view_t { - using key_type = typename ViewType::key_type; - using value_type = typename ViewType::value_type; - using cuco_store_device_ref_type = - typename ViewType::cuco_store_type::ref_type; + using key_type = typename ViewType::key_type; + using value_type = typename ViewType::value_type; + using cuco_store_device_ref_type = typename ViewType::cuco_set_type::ref_type; static_assert(!ViewType::binary_search); @@ -336,25 +340,26 @@ class kv_cuco_store_view_t { static constexpr bool binary_search = false; - using cuco_store_type = cuco::experimental::static_map< - key_t, - std::conditional_t, value_type, size_t>, - cuco::experimental::extent, - cuda::thread_scope_device, - thrust::equal_to, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>, - rmm::mr::stream_allocator_adaptor>>; + using cuco_set_type = + cuco::static_map, value_type, size_t>, + cuco::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>, + cuco_storage_type>; template - kv_cuco_store_view_t(cuco_store_type const* store, + kv_cuco_store_view_t(cuco_set_type const* store, std::enable_if_t, int32_t> = 0) : cuco_store_(store) { } template - kv_cuco_store_view_t(cuco_store_type const* store, + kv_cuco_store_view_t(cuco_set_type const* store, ValueIterator value_first, type invalid_value, std::enable_if_t, int32_t> = 0) @@ -392,7 +397,7 @@ class kv_cuco_store_view_t { cuco_store_->contains(key_first, key_last, value_first, stream.value()); } - auto cuco_store_find_device_ref() const { return cuco_store_->ref(cuco::experimental::find); } + auto cuco_store_find_device_ref() const { return cuco_store_->ref(cuco::find); } template std::enable_if_t, ValueIterator> store_value_first() const @@ -412,7 +417,7 @@ class kv_cuco_store_view_t { } private: - cuco_store_type const* cuco_store_{}; + cuco_set_type const* cuco_store_{}; std::conditional_t, ValueIterator, std::byte /* dummy */> store_value_first_{}; @@ -531,15 +536,16 @@ class kv_cuco_store_t { std::invoke_result_t), value_buffer_type&>; - using cuco_store_type = cuco::experimental::static_map< - key_t, - std::conditional_t, value_t, size_t>, - cuco::experimental::extent, - cuda::thread_scope_device, - thrust::equal_to, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>, - rmm::mr::stream_allocator_adaptor>>; + using cuco_set_type = + cuco::static_map, value_t, size_t>, + cuco::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>, + cuco_storage_type>; kv_cuco_store_t(rmm::cuda_stream_view stream) {} @@ -588,7 +594,7 @@ class kv_cuco_store_t { // requires placing the atomic variable on managed memory and this adds additional // complication. rmm::device_scalar counter(old_store_value_size, stream); - auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find); rmm::device_uvector store_value_offsets(num_keys, stream); thrust::tabulate( rmm::exec_policy(stream), @@ -629,7 +635,7 @@ class kv_cuco_store_t { // requires placing the atomic variable on managed memory and this adds additional // complication. rmm::device_scalar counter(old_store_value_size, stream); - auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find); rmm::device_uvector store_value_offsets(num_keys, stream); thrust::tabulate(rmm::exec_policy(stream), store_value_offsets.begin(), @@ -665,8 +671,8 @@ class kv_cuco_store_t { if constexpr (std::is_arithmetic_v) { auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first)); // FIXME: a temporary solution till insert_and_assign is added to - // cuco::experimental::static_map - auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + // cuco::static_map + auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find); thrust::for_each(rmm::exec_policy(stream), pair_first, pair_first + num_keys, @@ -679,7 +685,7 @@ class kv_cuco_store_t { // requires placing the atomic variable on managed memory and this adds additional // complication. rmm::device_scalar counter(old_store_value_size, stream); - auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find); rmm::device_uvector store_value_offsets(num_keys, stream); thrust::tabulate( rmm::exec_policy(stream), @@ -731,20 +737,19 @@ class kv_cuco_store_t { })), stream); - thrust::for_each( - rmm::exec_policy(stream), - kv_indices.begin(), - kv_indices.end(), - [key_first, - value_first, - store_value_first = get_dataframe_buffer_begin(store_values_), - device_ref = cuco_store_->ref(cuco::experimental::find)] __device__(auto kv_idx) { - size_t store_value_offset{}; - auto found = device_ref.find(*(key_first + kv_idx)); - assert(found != device_ref.end()); - store_value_offset = (*found).second; - *(store_value_first + store_value_offset) = *(value_first + kv_idx); - }); + thrust::for_each(rmm::exec_policy(stream), + kv_indices.begin(), + kv_indices.end(), + [key_first, + value_first, + store_value_first = get_dataframe_buffer_begin(store_values_), + device_ref = cuco_store_->ref(cuco::find)] __device__(auto kv_idx) { + size_t store_value_offset{}; + auto found = device_ref.find(*(key_first + kv_idx)); + assert(found != device_ref.end()); + store_value_offset = (*found).second; + *(store_value_first + store_value_offset) = *(value_first + kv_idx); + }); } } @@ -783,7 +788,7 @@ class kv_cuco_store_t { return std::make_tuple(std::move(retrieved_keys), std::move(retrieved_values)); } - cuco_store_type const* cuco_store_ptr() const { return cuco_store_.get(); } + cuco_set_type const* cuco_store_ptr() const { return cuco_store_.get(); } template std::enable_if_t, const_value_iterator> store_value_first() const @@ -821,23 +826,25 @@ class kv_cuco_store_t { auto stream_adapter = rmm::mr::make_stream_allocator_adaptor( rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()), stream); if constexpr (std::is_arithmetic_v) { - cuco_store_ = std::make_unique( - cuco_size, - cuco::sentinel::empty_key{invalid_key}, - cuco::sentinel::empty_value{invalid_value}, - thrust::equal_to{}, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>{}, - stream_adapter, - stream.value()); + cuco_store_ = + std::make_unique(cuco_size, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, + thrust::equal_to{}, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>{}, + cuco::thread_scope_device, + cuco_storage_type{}, + stream_adapter, + stream.value()); } else { - cuco_store_ = std::make_unique( + cuco_store_ = std::make_unique( cuco_size, cuco::sentinel::empty_key{invalid_key}, cuco::sentinel::empty_value{std::numeric_limits::max()}, thrust::equal_to{}, - cuco::experimental::linear_probing<1, // CG size - cuco::murmurhash3_32>{}, + cuco::linear_probing<1, // CG size + cuco::murmurhash3_32>{}, stream_adapter, stream); store_values_ = allocate_dataframe_buffer(0, stream); @@ -845,7 +852,7 @@ class kv_cuco_store_t { } } - std::unique_ptr cuco_store_{nullptr}; + std::unique_ptr cuco_store_{nullptr}; std::conditional_t, decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{})), std::byte /* dummy */> @@ -857,7 +864,7 @@ class kv_cuco_store_t { size_t size_{ 0}; // caching as cuco_store_->size() is expensive (this scans the entire slots to handle // user inserts through a device reference (and currently this is an upper bound (this - // will become exact once we fully switch to cuco::experimental::static_map and use the + // will become exact once we fully switch to cuco::static_map and use the // static_map class's insert_and_assign function; this function will be added soon) };