From 8f31e436cd92cfe2f46bb61ee25ff7cc6bef4177 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 15 Feb 2024 05:30:26 -0600
Subject: [PATCH 1/3] Support CUDA 12.2 (#4088)

* switches to CUDA 12.2.2 for building conda packages and wheels
* adds new tests running against CUDA 12.2.2

### Notes for Reviewers

This is part of ongoing work to build and test packages against CUDA 12.2.2 across all of RAPIDS.

For more details see:

* https://github.com/rapidsai/build-planning/issues/7
* https://github.com/rapidsai/shared-workflows/pull/166

Planning a second round of PRs to revert these references back to a proper `branch-24.{nn}` release branch of `shared-workflows` once https://github.com/rapidsai/shared-workflows/pull/166 is merged.

*(created with `rapids-reviser`)*

Authors:
  - James Lamb (https://github.com/jameslamb)
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - https://github.com/jakirkham

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: https://github.com/rapidsai/cugraph/pull/4088
---
 .github/workflows/build.yaml                  | 32 +++++++-------
 .github/workflows/pr.yaml                     | 42 +++++++++----------
 .github/workflows/test.yaml                   | 16 +++----
 ci/build_docs.sh                              |  2 +-
 ..._64.yaml => all_cuda-122_arch-x86_64.yaml} |  4 +-
 conda/recipes/cugraph/meta.yaml               | 10 ++++-
 conda/recipes/libcugraph/meta.yaml            | 19 ++++++++-
 conda/recipes/pylibcugraph/meta.yaml          | 11 ++++-
 cpp/tests/CMakeLists.txt                      |  9 +++-
 dependencies.yaml                             |  6 ++-
 docs/cugraph/source/conf.py                   |  2 +-
 fetch_rapids.cmake                            |  2 +-
 python/cugraph/CMakeLists.txt                 |  2 +-
 python/pylibcugraph/CMakeLists.txt            |  2 +-
 14 files changed, 102 insertions(+), 57 deletions(-)
 rename conda/environments/{all_cuda-120_arch-x86_64.yaml => all_cuda-122_arch-x86_64.yaml} (96%)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 5ecff1f8a75..92f70bb2aa7 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -38,7 +38,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -47,7 +47,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build-pylibcugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -83,7 +83,7 @@ jobs:
   wheel-publish-pylibcugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -93,7 +93,7 @@ jobs:
   wheel-build-cugraph:
     needs: wheel-publish-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -106,7 +106,7 @@ jobs:
   wheel-publish-cugraph:
     needs: wheel-build-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -116,7 +116,7 @@ jobs:
   wheel-build-nx-cugraph:
     needs: wheel-publish-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -126,7 +126,7 @@ jobs:
   wheel-publish-nx-cugraph:
     needs: wheel-build-nx-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -136,7 +136,7 @@ jobs:
   wheel-build-cugraph-dgl:
     needs: wheel-publish-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -146,7 +146,7 @@ jobs:
   wheel-publish-cugraph-dgl:
     needs: wheel-build-cugraph-dgl
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -156,7 +156,7 @@ jobs:
   wheel-build-cugraph-pyg:
     needs: wheel-publish-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -166,7 +166,7 @@ jobs:
   wheel-publish-cugraph-pyg:
     needs: wheel-build-cugraph-pyg
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -175,7 +175,7 @@ jobs:
       package-name: cugraph-pyg
   wheel-build-cugraph-equivariant:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -185,7 +185,7 @@ jobs:
   wheel-publish-cugraph-equivariant:
     needs: wheel-build-cugraph-equivariant
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 349d682a59e..36a53df14e1 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -33,41 +33,41 @@ jobs:
       - wheel-tests-cugraph-equivariant
       - devcontainer
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@test-cuda-12.2
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@test-cuda-12.2
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       node_type: cpu32
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2
     with:
       build_type: pull-request
   conda-notebook-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -77,7 +77,7 @@ jobs:
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -87,7 +87,7 @@ jobs:
   wheel-build-pylibcugraph:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/build_wheel_pylibcugraph.sh
@@ -98,14 +98,14 @@ jobs:
   wheel-tests-pylibcugraph:
     needs: wheel-build-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/test_wheel_pylibcugraph.sh
   wheel-build-cugraph:
     needs: wheel-tests-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/build_wheel_cugraph.sh
@@ -115,35 +115,35 @@ jobs:
   wheel-tests-cugraph:
     needs: wheel-build-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph.sh
   wheel-build-nx-cugraph:
     needs: wheel-tests-pylibcugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/build_wheel_nx-cugraph.sh
   wheel-tests-nx-cugraph:
     needs: wheel-build-nx-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/test_wheel_nx-cugraph.sh
   wheel-build-cugraph-dgl:
     needs: wheel-tests-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/build_wheel_cugraph-dgl.sh
   wheel-tests-cugraph-dgl:
     needs: wheel-build-cugraph-dgl
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph-dgl.sh
@@ -151,35 +151,35 @@ jobs:
   wheel-build-cugraph-pyg:
     needs: wheel-tests-cugraph
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/build_wheel_cugraph-pyg.sh
   wheel-tests-cugraph-pyg:
     needs: wheel-build-cugraph-pyg
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph-pyg.sh
       matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0"))
   wheel-build-cugraph-equivariant:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/build_wheel_cugraph-equivariant.sh
   wheel-tests-cugraph-equivariant:
     needs: wheel-build-cugraph-equivariant
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph-equivariant.sh
       matrix_filter: map(select(.ARCH == "amd64"))
   devcontainer:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@test-cuda-12.2
     with:
       node_type: cpu32
       extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f6a04fcc776..7e81354d1b8 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibcugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -41,7 +41,7 @@ jobs:
       script: ci/test_wheel_pylibcugraph.sh
   wheel-tests-cugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -50,7 +50,7 @@ jobs:
       script: ci/test_wheel_cugraph.sh
   wheel-tests-nx-cugraph:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -59,7 +59,7 @@ jobs:
       script: ci/test_wheel_nx-cugraph.sh
   wheel-tests-cugraph-dgl:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       matrix_filter: map(select(.ARCH == "amd64"))
   wheel-tests-cugraph-pyg:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -79,7 +79,7 @@ jobs:
       matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0"))
   wheel-tests-cugraph-equivariant:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 0ed2e69ae90..298a8b68791 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
similarity index 96%
rename from conda/environments/all_cuda-120_arch-x86_64.yaml
rename to conda/environments/all_cuda-122_arch-x86_64.yaml
index 03dade0ed1f..65734c96ff9 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -18,7 +18,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvtx-dev
 - cuda-profiler-api
-- cuda-version=12.0
+- cuda-version=12.2
 - cudf==24.4.*
 - cupy>=12.0.0
 - cxx-compiler
@@ -81,4 +81,4 @@ dependencies:
 - ucx-py==0.37.*
 - wget
 - wheel
-name: all_cuda-120_arch-x86_64
+name: all_cuda-122_arch-x86_64
diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml
index b8e3072dd38..ed214e4eb6e 100644
--- a/conda/recipes/cugraph/meta.yaml
+++ b/conda/recipes/cugraph/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -34,7 +34,12 @@ build:
     - SCCACHE_S3_USE_SSL
     - SCCACHE_S3_NO_CREDENTIALS
   ignore_run_exports_from:
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }}
+    {% else %}
     - {{ compiler('cuda') }}
+    - cuda-cudart-dev
+    {% endif %}
 
 requirements:
   build:
@@ -53,6 +58,8 @@ requirements:
     - cuda-version ={{ cuda_version }}
     {% if cuda_major == "11" %}
     - cudatoolkit
+    {% else %}
+    - cuda-cudart-dev
     {% endif %}
     - cudf ={{ minor_version }}
     - cython >=3.0.0
@@ -70,6 +77,7 @@ requirements:
     - cudatoolkit
     - cuda-python >=11.7.1,<12.0a0
     {% else %}
+    - cuda-cudart
     - cuda-python >=12.0,<13.0a0
     {% endif %}
     - cudf ={{ minor_version }}
diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml
index 66f72e6b6b5..31514a3a21d 100644
--- a/conda/recipes/libcugraph/meta.yaml
+++ b/conda/recipes/libcugraph/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -88,12 +88,16 @@ outputs:
       ignore_run_exports_from:
         {% if cuda_major == "11" %}
         - {{ compiler('cuda11') }}
+        {% else %}
+        - {{ compiler('cuda') }}
+        - cuda-cudart-dev
         {% endif %}
     requirements:
       build:
         - cmake {{ cmake_version }}
       host:
         - cuda-version ={{ cuda_version }}
+        - cuda-cudart-dev
       run:
         - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
         {% if cuda_major == "11" %}
@@ -105,6 +109,7 @@ outputs:
         - libcusparse {{ cuda11_libcusparse_run_version }}
         {% else %}
         - cuda-profiler-api
+        - cuda-cudart
         - libcublas
         - libcurand
         - libcusolver
@@ -131,17 +136,23 @@ outputs:
       ignore_run_exports_from:
         {% if cuda_major == "11" %}
         - {{ compiler('cuda11') }}
+        {% else %}
+        - {{ compiler('cuda') }}
+        - cuda-cudart-dev
         {% endif %}
     requirements:
       build:
         - cmake {{ cmake_version }}
       host:
         - cuda-version ={{ cuda_version }}
+        - cuda-cudart-dev
       run:
         - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
         - {{ pin_subpackage('libcugraph', exact=True) }}
         {% if cuda_major == "11" %}
         - cudatoolkit
+        {% else %}
+        - cuda-cudart
         {% endif %}
         - libcudf ={{ minor_version }}
         - librmm ={{ minor_version }}
@@ -160,18 +171,24 @@ outputs:
       ignore_run_exports_from:
         {% if cuda_major == "11" %}
         - {{ compiler('cuda11') }}
+        {% else %}
+        - {{ compiler('cuda') }}
+        - cuda-cudart-dev
         {% endif %}
     requirements:
       build:
         - cmake {{ cmake_version }}
       host:
         - cuda-version ={{ cuda_version }}
+        - cuda-cudart-dev
       run:
         - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
         - {{ pin_subpackage('libcugraph_etl', exact=True) }}
         - {{ pin_subpackage('libcugraph', exact=True) }}
         {% if cuda_major == "11" %}
         - cudatoolkit
+        {% else %}
+        - cuda-cudart
         {% endif %}
         - gmock {{ gtest_version }}
         - gtest {{ gtest_version }}
diff --git a/conda/recipes/pylibcugraph/meta.yaml b/conda/recipes/pylibcugraph/meta.yaml
index 0f66f55ccaa..42b5bc625eb 100644
--- a/conda/recipes/pylibcugraph/meta.yaml
+++ b/conda/recipes/pylibcugraph/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -34,7 +34,12 @@ build:
     - SCCACHE_S3_USE_SSL
     - SCCACHE_S3_NO_CREDENTIALS
   ignore_run_exports_from:
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }}
+    {% else %}
     - {{ compiler('cuda') }}
+    - cuda-cudart-dev
+    {% endif %}
 
 requirements:
   build:
@@ -53,6 +58,8 @@ requirements:
     - cuda-version ={{ cuda_version }}
     {% if cuda_major == "11" %}
     - cudatoolkit
+    {% else %}
+    - cuda-cudart-dev
     {% endif %}
     - cython >=3.0.0
     - libcugraph ={{ version }}
@@ -64,6 +71,8 @@ requirements:
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
     {% if cuda_major == "11" %}
     - cudatoolkit
+    {% else %}
+    - cuda-cudart
     {% endif %}
     - libcugraph ={{ version }}
     - python
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 3df979fe5c2..1e5d0489b1f 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -737,7 +737,14 @@ ConfigureCTest(CAPI_RANDOM_WALKS_TEST c_api/sg_random_walks_test.c)
 ConfigureCTest(CAPI_TRIANGLE_COUNT_TEST c_api/triangle_count_test.c)
 ConfigureCTest(CAPI_LOUVAIN_TEST c_api/louvain_test.c)
 ConfigureCTest(CAPI_LEIDEN_TEST c_api/leiden_test.c)
-ConfigureCTest(CAPI_LEGACY_SPECTRAL_TEST c_api/legacy_spectral_test.c)
+#############################################################################
+# Skipping due to CUDA 12.2 failure that traces back to RAFT                #
+# TODO: Uncomment this once the issue is fixed.                             #
+#                                                                           #
+# xref: https://github.com/rapidsai/cugraph/issues/4173                     #
+# xref: https://github.com/rapidsai/raft/issues/2186                        #
+#############################################################################
+# ConfigureCTest(CAPI_LEGACY_SPECTRAL_TEST c_api/legacy_spectral_test.c)
 ConfigureCTest(CAPI_LEGACY_ECG_TEST c_api/legacy_ecg_test.c)
 ConfigureCTest(CAPI_CORE_NUMBER_TEST c_api/core_number_test.c)
 ConfigureCTest(CAPI_SIMILARITY_TEST c_api/similarity_test.c)
diff --git a/dependencies.yaml b/dependencies.yaml
index 58354407bbc..1bfe5fde709 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,7 +3,7 @@ files:
   all:
     output: [conda]
     matrix:
-      cuda: ["11.8", "12.0"]
+      cuda: ["11.8", "12.2"]
       arch: [x86_64]
     includes:
       - checks
@@ -320,6 +320,10 @@ dependencies:
               cuda: "12.0"
             packages:
               - cuda-version=12.0
+          - matrix:
+              cuda: "12.2"
+            packages:
+              - cuda-version=12.2
   cuda:
     specific:
       - output_types: [conda]
diff --git a/docs/cugraph/source/conf.py b/docs/cugraph/source/conf.py
index 141e14a04ab..719f94d4838 100644
--- a/docs/cugraph/source/conf.py
+++ b/docs/cugraph/source/conf.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # pygdf documentation build configuration file, created by
 # sphinx-quickstart on Wed May  3 10:59:22 2017.
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index 3f90238109a..596908d01cd 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt
index f3548192fe2..79510f9bf8d 100644
--- a/python/cugraph/CMakeLists.txt
+++ b/python/cugraph/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/CMakeLists.txt
index 0901cce0ae9..cc76ab5ba2b 100644
--- a/python/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at

From a0947995eb1556bf46957804bdbd02c3ee4cb0a9 Mon Sep 17 00:00:00 2001
From: Naim <110031745+naimnv@users.noreply.github.com>
Date: Thu, 15 Feb 2024 14:47:30 +0100
Subject: [PATCH 2/3] Update SG notebook (#4169)

Update SG notebook to handle a handle a couple of cases where the current notebook would fail for unweighted graph and  if the graph doesn't  have node id 0.

**Changes for SG notebook:**
- For graphs without weights, run bfs instead of sssp.
- For the input graph doesn't have node with id 0, peek an existing node id as seed for BFS and SSP
- simplify bookkeeping for run statistics

**Changes for MG notebook:**
-  Call SSSP or BFS based on weighted attribute

Authors:
  - Naim (https://github.com/naimnv)

Approvers:
  - Brad Rees (https://github.com/BradReesWork)
  - Don Acosta (https://github.com/acostadon)

URL: https://github.com/rapidsai/cugraph/pull/4169
---
 ...e.ipynb => synth_release_single_gpu.ipynb} | 276 ++++++++----------
 .../synth_release_single_node_multi_gpu.ipynb |  43 +--
 2 files changed, 154 insertions(+), 165 deletions(-)
 rename notebooks/cugraph_benchmarks/{synth_release.ipynb => synth_release_single_gpu.ipynb} (77%)

diff --git a/notebooks/cugraph_benchmarks/synth_release.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb
similarity index 77%
rename from notebooks/cugraph_benchmarks/synth_release.ipynb
rename to notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb
index 18979f3ecee..1acef5d558b 100644
--- a/notebooks/cugraph_benchmarks/synth_release.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb
@@ -40,7 +40,8 @@
     "| Author        |    Date    |  Update             | cuGraph Version |  Test Hardware         |\n",
     "| --------------|------------|---------------------|-----------------|------------------------|\n",
     "| Don Acosta    | 1/12/2023  | Created             | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
-    "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n"
+    "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
+    "| Naim, Md      | 2/12/2024  | Modified            | 24.04 nightly   | RTX A6000, CUDA 12.0   |\n"
    ]
   },
   {
@@ -124,12 +125,11 @@
     "import gc\n",
     "import os\n",
     "from time import perf_counter\n",
-    "import numpy as np\n",
-    "import math\n",
+    "import pandas as pd\n",
+    "from collections import defaultdict\n",
     "\n",
     "# rapids\n",
     "import cugraph\n",
-    "import cudf\n",
     "\n",
     "# NetworkX libraries\n",
     "import networkx as nx\n",
@@ -212,7 +212,7 @@
     "\n",
     "\n",
     "# Which dataset is to be used\n",
-    "data = data_full\n"
+    "data = data_quick\n"
    ]
   },
   {
@@ -518,16 +518,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def nx_bfs(_G):\n",
-    "    seed = 0\n",
+    "def nx_bfs(_G, seed):\n",
     "    t1 = perf_counter()\n",
-    "    nb = nx.bfs_edges(_G, seed)\n",
-    "    nb_list = list(nb) # gen -> list\n",
+    "    _ = nx.bfs_edges(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_bfs(_G):\n",
-    "    seed = 0\n",
+    "def cu_bfs(_G, seed):\n",
     "    t1 = perf_counter()\n",
     "    _ = cugraph.bfs(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
@@ -547,17 +544,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def nx_sssp(_G):\n",
-    "    seed = 0\n",
+    "def nx_sssp(_G, seed):\n",
     "    t1 = perf_counter()\n",
-    "    _ = nx.shortest_path(_G, seed)\n",
+    "    if nx.is_weighted(_G):\n",
+    "        _ = nx.shortest_path(_G, seed)\n",
+    "    else:\n",
+    "        _ = nx.bfs_edges(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_sssp(_G):\n",
-    "    seed = 0\n",
+    "def cu_sssp(_G, seed):\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.sssp(_G, seed)\n",
+    "    if _G.weighted:\n",
+    "        _ = cugraph.sssp(_G, seed)\n",
+    "    else:\n",
+    "        _ = cugraph.bfs(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -571,6 +572,27 @@
     "# Benchmark"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Placeholders to collect algorithm run statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cugraph_algo_run_times = defaultdict(defaultdict)\n",
+    "nx_algo_run_times = defaultdict(defaultdict)\n",
+    "cugraph_graph_creation_times = defaultdict()\n",
+    "nx_graph_creation_times = defaultdict()\n",
+    "perf_algos = defaultdict(defaultdict)\n",
+    "perf = defaultdict(defaultdict)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -587,79 +609,50 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# arrays to capture performance gains\n",
-    "names = []\n",
-    "algos = []\n",
-    "graph_create_cu = []\n",
-    "graph_create_nx = []\n",
-    "\n",
-    "# Two dimension data [file, perf]\n",
-    "time_algo_nx = []          # NetworkX\n",
-    "time_algo_cu = []          # cuGraph\n",
-    "perf = []\n",
-    "perf_algo = []\n",
     "\n",
-    "algos.append(\"   \")\n",
-    "\n",
-    "i = 0\n",
-    "for k,v in data.items():\n",
-    "    # init all the 2-d arrays\n",
-    "    time_algo_nx.append([])\n",
-    "    time_algo_cu.append([])\n",
-    "    perf.append([])\n",
-    "    perf_algo.append([])\n",
-    "\n",
-    "    # Saved the file Name\n",
-    "    names.append(k)\n",
+    "for dataset, scale in data.items():\n",
     "\n",
     "    # generate data\n",
     "    print(\"------------------------------\")\n",
-    "    print(f'Creating Graph of Scale = {v}')\n",
-    "\n",
-    "    gdf = generate_data(v)\n",
+    "    print(f'Creating Graph of Scale = {scale}')\n",
+    "    \n",
+    "    gdf = generate_data(scale)\n",
     "    pdf = gdf.to_pandas()\n",
-    "    print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n",
     "\n",
-    "    # create the graphs\n",
+    "    print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n",
+    "    \n",
+    "    # create cuGraph and NX graphs\n",
     "    g_cu, tcu = create_cu_graph(gdf)\n",
     "    g_nx, tnx = create_nx_graph(pdf)\n",
-    "    graph_create_cu.append(tcu)\n",
-    "    graph_create_nx.append(tnx)\n",
+    "    cugraph_graph_creation_times[dataset] = tcu\n",
+    "    nx_graph_creation_times[dataset] = tnx\n",
     "    del gdf, pdf\n",
     "\n",
     "    # prep\n",
     "    deg = g_cu.degree()\n",
     "    deg_max = deg['degree'].max()\n",
-    "\n",
     "    alpha = 1 / deg_max\n",
     "    num_nodes = g_cu.number_of_vertices()\n",
-    "\n",
     "    del deg\n",
     "    gc.collect()\n",
     "\n",
-    "    #----- Algorithm order is same as defined at top ----\n",
-    "\n",
     "    #-- Katz \n",
-    "    print(\"\\tKatz  \", end = '')\n",
-    "    if i == 0: \n",
-    "        algos.append(\"Katz\")\n",
-    "\n",
+    "    algorithm = \"Katz\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
     "    tx = nx_katz(g_nx, alpha)\n",
     "    print(\"c.\", end='')\n",
     "    tc = cu_katz(g_cu, alpha)\n",
     "    print(\"\")\n",
-    "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    \n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
     "    #-- BC\n",
-    "    print(\"\\tBC k=100  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"BC Estimate fixed\")\n",
-    "\n",
+    "    algorithm = \"BC\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    k = 100\n",
     "    if k > num_nodes:\n",
     "        k = int(num_nodes)\n",
@@ -668,80 +661,70 @@
     "    print(\"c.\", end='')\n",
     "    tc = cu_bc(g_cu, k)\n",
     "    print(\" \")\n",
-    "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
     "    #-- Louvain\n",
-    "    print(\"\\tLouvain  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"Louvain\")\n",
-    "\n",
+    "    algorithm = \"Louvain\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
     "    tx = nx_louvain(g_nx)\n",
     "    print(\"c.\", end='')\n",
     "    tc = cu_louvain(g_cu)\n",
     "    print(\" \")\n",
     "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
     "    #-- TC\n",
-    "    print(\"\\tTC  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"TC\")\n",
-    "\n",
+    "    algorithm = \"TC\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
     "    tx = nx_tc(g_nx)\n",
     "    print(\"c.\", end='')\n",
     "    tc = cu_tc(g_cu)\n",
     "    print(\" \")\n",
-    "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    \n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
     "    #-- Core Number\n",
-    "    print(\"\\tCore Number  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"Core Number\")\n",
-    "\n",
+    "    algorithm = \"Core Number\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
     "    tx = nx_core_num(g_nx)\n",
     "    print(\"c.\", end='')\n",
     "    tc = cu_core_num(g_cu)\n",
     "    print(\" \")\n",
     "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
     "    #-- PageRank\n",
-    "    print(\"\\tPageRank  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"PageRank\")\n",
-    "\n",
+    "    algorithm = \"PageRank\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
     "    tx = nx_pagerank(g_nx)\n",
     "    print(\"c.\", end='')\n",
     "    tc = cu_pagerank(g_cu)\n",
     "    print(\" \")\n",
     "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
     "    #-- Jaccard\n",
-    "    print(\"\\tJaccard  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"Jaccard\")\n",
+    "    algorithm = \"Jaccard\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "\n",
     "    print(\"n.\", end='')\n",
     "    tx = nx_jaccard(g_nx)\n",
@@ -749,46 +732,44 @@
     "    tc = cu_jaccard(g_cu)\n",
     "    print(\" \")\n",
     "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
-    "    #-- BFS\n",
-    "    print(\"\\tBFS  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"BFS\")\n",
+    "    # Seed for BFS and SSSP\n",
+    "    nx_seed = list(g_nx.nodes)[0]\n",
+    "    cu_seed = g_cu.nodes().to_pandas().iloc[0]\n",
     "\n",
+    "    #-- BFS\n",
+    "    algorithm = \"BFS\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
-    "    tx = nx_bfs(g_nx)\n",
+    "    tx = nx_bfs(g_nx, seed=nx_seed)\n",
     "    print(\"c.\", end='')\n",
-    "    tc = cu_bfs(g_cu)\n",
+    "    tc = cu_bfs(g_cu, seed=cu_seed)\n",
     "    print(\" \")\n",
     "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
     "    #-- SSSP\n",
-    "    print(\"\\tSSSP  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"SSP\")\n",
-    "\n",
+    "    algorithm = \"SSSP\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
-    "    tx = nx_sssp(g_nx)\n",
+    "    tx = nx_sssp(g_nx, seed=nx_seed)\n",
+    "\n",
     "    print(\"c.\", end='')\n",
-    "    tc = cu_sssp(g_cu)\n",
+    "    tc = cu_sssp(g_cu, seed=cu_seed)\n",
     "    print(\" \")\n",
     "\n",
-    "    time_algo_nx[i].append(tx)\n",
-    "    time_algo_cu[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
-    "    # increament count\n",
-    "    i = i + 1\n",
-    "    \n",
     "    del g_cu, g_nx\n",
     "    gc.collect()\n"
    ]
@@ -799,13 +780,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Print results\n",
-    "print(algos)\n",
-    "\n",
-    "for i in range(num_datasets):\n",
-    "    print(f\"{names[i]}\")\n",
-    "    print(f\"{perf[i]}\")\n",
-    "    print(f\"{perf_algo[i]}\")"
+    "# Speedup\n",
+    "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX)------\\n\")\n",
+    "print(pd.DataFrame(perf))\n",
+    "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX, excluding graph creation time)------\\n\")\n",
+    "print(pd.DataFrame(perf_algos))"
    ]
   },
   {
@@ -814,15 +793,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Print results\n",
-    "print(\"\\n------------------------------\")\n",
-    "print(\"\\tAlgorithm Run times  (NX then cuGraph)\\n\")\n",
+    "# Nx and cuGraph execution times for different algorithms\n",
+    "nx_and_cugraph_run_times = pd.DataFrame()\n",
+    "for dataset in cugraph_algo_run_times.keys():\n",
+    "    temp_df = pd.DataFrame({'NX': nx_algo_run_times[dataset], 'cuGraph': cugraph_algo_run_times[dataset]})\n",
+    "    columns = [(dataset, 'cuGraph'), (dataset, 'NX')]\n",
+    "    temp_df.columns = pd.MultiIndex.from_tuples(columns)\n",
+    "    nx_and_cugraph_run_times = pd.concat([temp_df, nx_and_cugraph_run_times], axis=1)\n",
     "\n",
-    "print(algos)\n",
-    "for i in range(num_datasets):\n",
-    "    print(f\"{names[i]}\")\n",
-    "    print(f\"{time_algo_nx[i]}\")\n",
-    "    print(f\"{time_algo_cu[i]}\")"
+    "print(\"\\n\\t------cuGraph and NX execution times for different algorithms-----\\n\")\n",
+    "print(nx_and_cugraph_run_times)"
    ]
   },
   {
@@ -855,7 +835,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 15:55:03) \n[GCC 10.4.0]"
+   "version": "3.1.0"
   },
   "vscode": {
    "interpreter": {
diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
index c44f475c441..dd09830160f 100644
--- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
@@ -39,9 +39,9 @@
     "    \n",
     "| Author        |    Date    |  Update             | cuGraph Version |  Test Hardware         |\n",
     "| --------------|------------|---------------------|-----------------|------------------------|\n",
-    "| Don Acosta    | 1/12/2023  | Created             | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
-    "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
-    "| Naim, Md      | 2/08/2024  | Modified for SNMG   | 24.04 nightly   | RTX A6000, CUDA 12.0   |\n"
+    "| Don Acosta    | 1/12/2023  | Created             | 23.02 nightly   | 2xRTX A6000, CUDA 11.7 |\n",
+    "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | 2xRTX A6000, CUDA 11.7 |\n",
+    "| Naim, Md      | 2/08/2024  | Modified for SNMG   | 24.04 nightly   | 2xRTX A6000, CUDA 12.0 |\n"
    ]
   },
   {
@@ -590,7 +590,10 @@
    "source": [
     "def nx_sssp(_G, seed):\n",
     "    t1 = perf_counter()\n",
-    "    _ = nx.shortest_path(_G, seed)\n",
+    "    if nx.is_weighted(_G):\n",
+    "        _ = nx.shortest_path(_G, seed)\n",
+    "    else:\n",
+    "        _ = nx.bfs_edges(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
@@ -599,9 +602,17 @@
     "    t1 = perf_counter()\n",
     "    # SSSP requires weighted graph\n",
     "    if mg:\n",
-    "        _ = cugraph.dask.bfs(_G, seed)\n",
+    "        if _G.weighted: \n",
+    "            _ = cugraph.dask.sssp(_G, seed)\n",
+    "        else:\n",
+    "            _ = cugraph.dask.bfs(_G, seed)\n",
+    "\n",
     "    else:\n",
-    "        _ = cugraph.bfs(_G, seed)\n",
+    "        if _G.weighted:\n",
+    "            _ = cugraph.ssp(_G, seed)\n",
+    "        else:\n",
+    "            _ = cugraph.bfs(_G, seed)\n",
+    "\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -646,14 +657,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "\n",
-    "nx_algo_run_times = defaultdict(defaultdict)\n",
     "cugraph_algo_run_times = defaultdict(defaultdict)\n",
-    "perf_algos = defaultdict(defaultdict)\n",
-    "perf = defaultdict(defaultdict)\n",
+    "nx_algo_run_times = defaultdict(defaultdict)\n",
     "cugraph_graph_creation_times = defaultdict()\n",
-    "nx_graph_creation_times = defaultdict()\n"
+    "nx_graph_creation_times = defaultdict()\n",
+    "perf_algos = defaultdict(defaultdict)\n",
+    "perf = defaultdict(defaultdict)"
    ]
   },
   {
@@ -811,9 +820,9 @@
     "    algorithm = \"BFS\"\n",
     "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
-    "    tx = nx_bfs(g_nx, nx_seed)\n",
+    "    tx = nx_bfs(g_nx, seed=nx_seed)\n",
     "    print(\"c.\", end='')\n",
-    "    tc = cu_bfs(g_cu, seed = cu_seed, mg=True)\n",
+    "    tc = cu_bfs(g_cu, seed=cu_seed, mg=True)\n",
     "    print(\" \")\n",
     "\n",
     "    nx_algo_run_times[dataset][algorithm] = tx\n",
@@ -825,10 +834,10 @@
     "    algorithm = \"SSSP\"\n",
     "    print(f\"\\t{algorithm}  \", end = '')\n",
     "    print(\"n.\", end='')\n",
-    "    tx = nx_sssp(g_nx, nx_seed)\n",
+    "    tx = nx_sssp(g_nx, seed=nx_seed)\n",
     "\n",
     "    print(\"c.\", end='')\n",
-    "    tc = cu_sssp(g_cu, seed = cu_seed, mg=True)\n",
+    "    tc = cu_sssp(g_cu, seed=cu_seed, mg=True)\n",
     "    print(\" \")\n",
     "\n",
     "    nx_algo_run_times[dataset][algorithm] = tx\n",
@@ -856,7 +865,7 @@
     "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX)------\\n\")\n",
     "print(pd.DataFrame(perf))\n",
     "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX, excluding graph creation time)------\\n\")\n",
-    "print(pd.DataFrame(perf_algos))\n"
+    "print(pd.DataFrame(perf_algos))"
    ]
   },
   {

From f0388bcb65b27a5eee2cc9f9627109594b85c244 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Fri, 16 Feb 2024 17:50:16 -0800
Subject: [PATCH 3/3] Update cugraph for compatibility with the latest cuco
 (#4111)

This PR updates cugraph to make it compatible with the latest cuco.

Depends on https://github.com/rapidsai/rapids-cmake/pull/526

CMake changes will be reverted once https://github.com/rapidsai/rapids-cmake/pull/526 is merged.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Naim (https://github.com/naimnv)
  - Brad Rees (https://github.com/BradReesWork)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: https://github.com/rapidsai/cugraph/pull/4111
---
 cpp/src/prims/key_store.cuh |  81 +++++++-------
 cpp/src/prims/kv_store.cuh  | 205 +++++++++++++++++++-----------------
 2 files changed, 147 insertions(+), 139 deletions(-)

diff --git a/cpp/src/prims/key_store.cuh b/cpp/src/prims/key_store.cuh
index 6d135b4e94e..907ca36ef4a 100644
--- a/cpp/src/prims/key_store.cuh
+++ b/cpp/src/prims/key_store.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,6 +35,8 @@ namespace cugraph {
 
 namespace detail {
 
+using cuco_storage_type = cuco::storage<1>;  ///< cuco window storage type
+
 template <typename KeyIterator>
 struct key_binary_search_contains_op_t {
   using key_type = typename thrust::iterator_traits<KeyIterator>::value_type;
@@ -70,9 +72,8 @@ struct key_binary_search_store_device_view_t {
 
 template <typename ViewType>
 struct key_cuco_store_contains_device_view_t {
-  using key_type = typename ViewType::key_type;
-  using cuco_store_device_ref_type =
-    typename ViewType::cuco_store_type::ref_type<cuco::experimental::contains_tag>;
+  using key_type                   = typename ViewType::key_type;
+  using cuco_store_device_ref_type = typename ViewType::cuco_set_type::ref_type<cuco::contains_tag>;
 
   static_assert(!ViewType::binary_search);
 
@@ -88,9 +89,8 @@ struct key_cuco_store_contains_device_view_t {
 
 template <typename ViewType>
 struct key_cuco_store_insert_device_view_t {
-  using key_type = typename ViewType::key_type;
-  using cuco_store_device_ref_type =
-    typename ViewType::cuco_store_type::ref_type<cuco::experimental::insert_tag>;
+  using key_type                   = typename ViewType::key_type;
+  using cuco_store_device_ref_type = typename ViewType::cuco_set_type::ref_type<cuco::insert_tag>;
 
   static_assert(!ViewType::binary_search);
 
@@ -147,16 +147,17 @@ class key_cuco_store_view_t {
 
   static constexpr bool binary_search = false;
 
-  using cuco_store_type = cuco::experimental::static_set<
-    key_t,
-    cuco::experimental::extent<std::size_t>,
-    cuda::thread_scope_device,
-    thrust::equal_to<key_t>,
-    cuco::experimental::linear_probing<1,  // CG size
-                                       cuco::murmurhash3_32<key_t>>,
-    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
+  using cuco_set_type =
+    cuco::static_set<key_t,
+                     cuco::extent<std::size_t>,
+                     cuda::thread_scope_device,
+                     thrust::equal_to<key_t>,
+                     cuco::linear_probing<1,  // CG size
+                                          cuco::murmurhash3_32<key_t>>,
+                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>,
+                     cuco_storage_type>;
 
-  key_cuco_store_view_t(cuco_store_type const* store) : cuco_store_(store) {}
+  key_cuco_store_view_t(cuco_set_type const* store) : cuco_store_(store) {}
 
   template <typename QueryKeyIterator, typename ResultValueIterator>
   void contains(QueryKeyIterator key_first,
@@ -167,17 +168,14 @@ class key_cuco_store_view_t {
     cuco_store_->contains(key_first, key_last, value_first, stream);
   }
 
-  auto cuco_store_contains_device_ref() const
-  {
-    return cuco_store_->ref(cuco::experimental::contains);
-  }
+  auto cuco_store_contains_device_ref() const { return cuco_store_->ref(cuco::contains); }
 
-  auto cuco_store_insert_device_ref() const { return cuco_store_->ref(cuco::experimental::insert); }
+  auto cuco_store_insert_device_ref() const { return cuco_store_->ref(cuco::insert); }
 
   key_t invalid_key() const { return cuco_store_->get_empty_key_sentinel(); }
 
  private:
-  cuco_store_type const* cuco_store_{};
+  cuco_set_type const* cuco_store_{};
 };
 
 template <typename key_t>
@@ -240,14 +238,15 @@ class key_cuco_store_t {
  public:
   using key_type = key_t;
 
-  using cuco_store_type = cuco::experimental::static_set<
-    key_t,
-    cuco::experimental::extent<std::size_t>,
-    cuda::thread_scope_device,
-    thrust::equal_to<key_t>,
-    cuco::experimental::linear_probing<1,  // CG size
-                                       cuco::murmurhash3_32<key_t>>,
-    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
+  using cuco_set_type =
+    cuco::static_set<key_t,
+                     cuco::extent<std::size_t>,
+                     cuda::thread_scope_device,
+                     thrust::equal_to<key_t>,
+                     cuco::linear_probing<1,  // CG size
+                                          cuco::murmurhash3_32<key_t>>,
+                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>,
+                     cuco_storage_type>;
 
   key_cuco_store_t(rmm::cuda_stream_view stream) {}
 
@@ -306,7 +305,7 @@ class key_cuco_store_t {
     return keys;
   }
 
-  cuco_store_type const* cuco_store_ptr() const { return cuco_store_.get(); }
+  cuco_set_type const* cuco_store_ptr() const { return cuco_store_.get(); }
 
   key_t invalid_key() const { return cuco_store_->empty_key_sentinel(); }
 
@@ -324,17 +323,19 @@ class key_cuco_store_t {
 
     auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(
       rmm::mr::polymorphic_allocator<std::byte>(rmm::mr::get_current_device_resource()), stream);
-    cuco_store_ = std::make_unique<cuco_store_type>(
-      cuco_size,
-      cuco::sentinel::empty_key<key_t>{invalid_key},
-      thrust::equal_to<key_t>{},
-      cuco::experimental::linear_probing<1,  // CG size
-                                         cuco::murmurhash3_32<key_t>>{},
-      stream_adapter,
-      stream.value());
+    cuco_store_ =
+      std::make_unique<cuco_set_type>(cuco_size,
+                                      cuco::sentinel::empty_key<key_t>{invalid_key},
+                                      thrust::equal_to<key_t>{},
+                                      cuco::linear_probing<1,  // CG size
+                                                           cuco::murmurhash3_32<key_t>>{},
+                                      cuco::thread_scope_device,
+                                      cuco_storage_type{},
+                                      stream_adapter,
+                                      stream.value());
   }
 
-  std::unique_ptr<cuco_store_type> cuco_store_{nullptr};
+  std::unique_ptr<cuco_set_type> cuco_store_{nullptr};
 
   size_t capacity_{0};
   size_t size_{0};  // caching as cuco_store_->size() is expensive (this scans the entire slots to
diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh
index f17441ad6ab..be4fde2fbff 100644
--- a/cpp/src/prims/kv_store.cuh
+++ b/cpp/src/prims/kv_store.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,6 +49,8 @@ namespace cugraph {
 
 namespace detail {
 
+using cuco_storage_type = cuco::storage<1>;  ///< cuco window storage type
+
 template <typename KeyIterator, typename ValueIterator>
 struct kv_binary_search_find_op_t {
   using key_type   = typename thrust::iterator_traits<KeyIterator>::value_type;
@@ -86,18 +88,19 @@ struct kv_binary_search_contains_op_t {
 
 template <typename KeyIterator>
 struct kv_cuco_insert_and_increment_t {
-  using key_type        = typename thrust::iterator_traits<KeyIterator>::value_type;
-  using cuco_store_type = cuco::experimental::static_map<
-    key_type,
-    size_t,
-    cuco::experimental::extent<std::size_t>,
-    cuda::thread_scope_device,
-    thrust::equal_to<key_type>,
-    cuco::experimental::linear_probing<1,  // CG size
-                                       cuco::murmurhash3_32<key_t>>,
-    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
-
-  typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag> device_ref{};
+  using key_type = typename thrust::iterator_traits<KeyIterator>::value_type;
+  using cuco_set_type =
+    cuco::static_map<key_type,
+                     size_t,
+                     cuco::extent<std::size_t>,
+                     cuda::thread_scope_device,
+                     thrust::equal_to<key_type>,
+                     cuco::linear_probing<1,  // CG size
+                                          cuco::murmurhash3_32<key_t>>,
+                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>,
+                     cuco_storage_type>;
+
+  typename cuco_set_type::ref_type<cuco::insert_and_find_tag> device_ref{};
   KeyIterator key_first{};
   size_t* counter{nullptr};
   size_t invalid_idx{};
@@ -109,7 +112,7 @@ struct kv_cuco_insert_and_increment_t {
     if (inserted) {
       cuda::atomic_ref<size_t, cuda::thread_scope_device> atomic_counter(*counter);
       auto idx       = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed);
-      using ref_type = typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag>;
+      using ref_type = typename cuco_set_type::ref_type<cuco::insert_and_find_tag>;
       cuda::atomic_ref<typename ref_type::mapped_type, cuda::thread_scope_device> ref(
         (*iter).second);
       ref.store(idx, cuda::std::memory_order_relaxed);
@@ -122,18 +125,19 @@ struct kv_cuco_insert_and_increment_t {
 
 template <typename KeyIterator, typename StencilIterator, typename PredOp>
 struct kv_cuco_insert_if_and_increment_t {
-  using key_type        = typename thrust::iterator_traits<KeyIterator>::value_type;
-  using cuco_store_type = cuco::experimental::static_map<
-    key_type,
-    size_t,
-    cuco::experimental::extent<std::size_t>,
-    cuda::thread_scope_device,
-    thrust::equal_to<key_type>,
-    cuco::experimental::linear_probing<1,  // CG size
-                                       cuco::murmurhash3_32<key_t>>,
-    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
-
-  typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag> device_ref{};
+  using key_type = typename thrust::iterator_traits<KeyIterator>::value_type;
+  using cuco_set_type =
+    cuco::static_map<key_type,
+                     size_t,
+                     cuco::extent<std::size_t>,
+                     cuda::thread_scope_device,
+                     thrust::equal_to<key_type>,
+                     cuco::linear_probing<1,  // CG size
+                                          cuco::murmurhash3_32<key_t>>,
+                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>,
+                     cuco_storage_type>;
+
+  typename cuco_set_type::ref_type<cuco::insert_and_find_tag> device_ref{};
   KeyIterator key_first{};
   StencilIterator stencil_first{};
   PredOp pred_op{};
@@ -149,7 +153,7 @@ struct kv_cuco_insert_if_and_increment_t {
     if (inserted) {
       cuda::atomic_ref<size_t, cuda::thread_scope_device> atomic_counter(*counter);
       auto idx       = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed);
-      using ref_type = typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag>;
+      using ref_type = typename cuco_set_type::ref_type<cuco::insert_and_find_tag>;
       cuda::atomic_ref<typename ref_type::mapped_type, cuda::thread_scope_device> ref(
         (*iter).second);
       ref.store(idx, cuda::std::memory_order_relaxed);
@@ -162,23 +166,24 @@ struct kv_cuco_insert_if_and_increment_t {
 
 template <typename key_t, typename value_t>
 struct kv_cuco_insert_and_assign_t {
-  using cuco_store_type = cuco::experimental::static_map<
-    key_t,
-    std::conditional_t<std::is_arithmetic_v<value_t>, value_t, size_t>,
-    cuco::experimental::extent<std::size_t>,
-    cuda::thread_scope_device,
-    thrust::equal_to<key_t>,
-    cuco::experimental::linear_probing<1,  // CG size
-                                       cuco::murmurhash3_32<key_t>>,
-    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
-
-  typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag> device_ref{};
+  using cuco_set_type =
+    cuco::static_map<key_t,
+                     std::conditional_t<std::is_arithmetic_v<value_t>, value_t, size_t>,
+                     cuco::extent<std::size_t>,
+                     cuda::thread_scope_device,
+                     thrust::equal_to<key_t>,
+                     cuco::linear_probing<1,  // CG size
+                                          cuco::murmurhash3_32<key_t>>,
+                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>,
+                     cuco_storage_type>;
+
+  typename cuco_set_type::ref_type<cuco::insert_and_find_tag> device_ref{};
 
   __device__ void operator()(thrust::tuple<key_t, value_t> pair)
   {
     auto [iter, inserted] = device_ref.insert_and_find(pair);
     if (!inserted) {
-      using ref_type = typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag>;
+      using ref_type = typename cuco_set_type::ref_type<cuco::insert_and_find_tag>;
       cuda::atomic_ref<typename ref_type::mapped_type, cuda::thread_scope_device> ref(
         (*iter).second);
       ref.store(thrust::get<1>(pair), cuda::std::memory_order_relaxed);
@@ -220,10 +225,9 @@ struct kv_binary_search_store_device_view_t {
 
 template <typename ViewType>
 struct kv_cuco_store_find_device_view_t {
-  using key_type   = typename ViewType::key_type;
-  using value_type = typename ViewType::value_type;
-  using cuco_store_device_ref_type =
-    typename ViewType::cuco_store_type::ref_type<cuco::experimental::find_tag>;
+  using key_type                   = typename ViewType::key_type;
+  using value_type                 = typename ViewType::value_type;
+  using cuco_store_device_ref_type = typename ViewType::cuco_set_type::ref_type<cuco::find_tag>;
 
   static_assert(!ViewType::binary_search);
 
@@ -336,25 +340,26 @@ class kv_cuco_store_view_t {
 
   static constexpr bool binary_search = false;
 
-  using cuco_store_type = cuco::experimental::static_map<
-    key_t,
-    std::conditional_t<std::is_arithmetic_v<value_type>, value_type, size_t>,
-    cuco::experimental::extent<std::size_t>,
-    cuda::thread_scope_device,
-    thrust::equal_to<key_t>,
-    cuco::experimental::linear_probing<1,  // CG size
-                                       cuco::murmurhash3_32<key_t>>,
-    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
+  using cuco_set_type =
+    cuco::static_map<key_t,
+                     std::conditional_t<std::is_arithmetic_v<value_type>, value_type, size_t>,
+                     cuco::extent<std::size_t>,
+                     cuda::thread_scope_device,
+                     thrust::equal_to<key_t>,
+                     cuco::linear_probing<1,  // CG size
+                                          cuco::murmurhash3_32<key_t>>,
+                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>,
+                     cuco_storage_type>;
 
   template <typename type = value_type>
-  kv_cuco_store_view_t(cuco_store_type const* store,
+  kv_cuco_store_view_t(cuco_set_type const* store,
                        std::enable_if_t<std::is_arithmetic_v<type>, int32_t> = 0)
     : cuco_store_(store)
   {
   }
 
   template <typename type = value_type>
-  kv_cuco_store_view_t(cuco_store_type const* store,
+  kv_cuco_store_view_t(cuco_set_type const* store,
                        ValueIterator value_first,
                        type invalid_value,
                        std::enable_if_t<!std::is_arithmetic_v<type>, int32_t> = 0)
@@ -392,7 +397,7 @@ class kv_cuco_store_view_t {
     cuco_store_->contains(key_first, key_last, value_first, stream.value());
   }
 
-  auto cuco_store_find_device_ref() const { return cuco_store_->ref(cuco::experimental::find); }
+  auto cuco_store_find_device_ref() const { return cuco_store_->ref(cuco::find); }
 
   template <typename type = value_type>
   std::enable_if_t<!std::is_arithmetic_v<type>, ValueIterator> store_value_first() const
@@ -412,7 +417,7 @@ class kv_cuco_store_view_t {
   }
 
  private:
-  cuco_store_type const* cuco_store_{};
+  cuco_set_type const* cuco_store_{};
   std::conditional_t<std::is_arithmetic_v<value_type>, ValueIterator, std::byte /* dummy */>
     store_value_first_{};
 
@@ -531,15 +536,16 @@ class kv_cuco_store_t {
     std::invoke_result_t<decltype(get_dataframe_buffer_cbegin<value_buffer_type>),
                          value_buffer_type&>;
 
-  using cuco_store_type = cuco::experimental::static_map<
-    key_t,
-    std::conditional_t<std::is_arithmetic_v<value_t>, value_t, size_t>,
-    cuco::experimental::extent<std::size_t>,
-    cuda::thread_scope_device,
-    thrust::equal_to<key_t>,
-    cuco::experimental::linear_probing<1,  // CG size
-                                       cuco::murmurhash3_32<key_t>>,
-    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
+  using cuco_set_type =
+    cuco::static_map<key_t,
+                     std::conditional_t<std::is_arithmetic_v<value_t>, value_t, size_t>,
+                     cuco::extent<std::size_t>,
+                     cuda::thread_scope_device,
+                     thrust::equal_to<key_t>,
+                     cuco::linear_probing<1,  // CG size
+                                          cuco::murmurhash3_32<key_t>>,
+                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>,
+                     cuco_storage_type>;
 
   kv_cuco_store_t(rmm::cuda_stream_view stream) {}
 
@@ -588,7 +594,7 @@ class kv_cuco_store_t {
       // requires placing the atomic variable on managed memory and this adds additional
       // complication.
       rmm::device_scalar<size_t> counter(old_store_value_size, stream);
-      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find);
       rmm::device_uvector<size_t> store_value_offsets(num_keys, stream);
       thrust::tabulate(
         rmm::exec_policy(stream),
@@ -629,7 +635,7 @@ class kv_cuco_store_t {
       // requires placing the atomic variable on managed memory and this adds additional
       // complication.
       rmm::device_scalar<size_t> counter(old_store_value_size, stream);
-      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find);
       rmm::device_uvector<size_t> store_value_offsets(num_keys, stream);
       thrust::tabulate(rmm::exec_policy(stream),
                        store_value_offsets.begin(),
@@ -665,8 +671,8 @@ class kv_cuco_store_t {
     if constexpr (std::is_arithmetic_v<value_t>) {
       auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first));
       // FIXME: a temporary solution till insert_and_assign is added to
-      // cuco::experimental::static_map
-      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      // cuco::static_map
+      auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find);
       thrust::for_each(rmm::exec_policy(stream),
                        pair_first,
                        pair_first + num_keys,
@@ -679,7 +685,7 @@ class kv_cuco_store_t {
       // requires placing the atomic variable on managed memory and this adds additional
       // complication.
       rmm::device_scalar<size_t> counter(old_store_value_size, stream);
-      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      auto mutable_device_ref = cuco_store_->ref(cuco::insert_and_find);
       rmm::device_uvector<size_t> store_value_offsets(num_keys, stream);
       thrust::tabulate(
         rmm::exec_policy(stream),
@@ -731,20 +737,19 @@ class kv_cuco_store_t {
                                                         })),
                         stream);
 
-      thrust::for_each(
-        rmm::exec_policy(stream),
-        kv_indices.begin(),
-        kv_indices.end(),
-        [key_first,
-         value_first,
-         store_value_first = get_dataframe_buffer_begin(store_values_),
-         device_ref        = cuco_store_->ref(cuco::experimental::find)] __device__(auto kv_idx) {
-          size_t store_value_offset{};
-          auto found = device_ref.find(*(key_first + kv_idx));
-          assert(found != device_ref.end());
-          store_value_offset                        = (*found).second;
-          *(store_value_first + store_value_offset) = *(value_first + kv_idx);
-        });
+      thrust::for_each(rmm::exec_policy(stream),
+                       kv_indices.begin(),
+                       kv_indices.end(),
+                       [key_first,
+                        value_first,
+                        store_value_first = get_dataframe_buffer_begin(store_values_),
+                        device_ref        = cuco_store_->ref(cuco::find)] __device__(auto kv_idx) {
+                         size_t store_value_offset{};
+                         auto found = device_ref.find(*(key_first + kv_idx));
+                         assert(found != device_ref.end());
+                         store_value_offset                        = (*found).second;
+                         *(store_value_first + store_value_offset) = *(value_first + kv_idx);
+                       });
     }
   }
 
@@ -783,7 +788,7 @@ class kv_cuco_store_t {
     return std::make_tuple(std::move(retrieved_keys), std::move(retrieved_values));
   }
 
-  cuco_store_type const* cuco_store_ptr() const { return cuco_store_.get(); }
+  cuco_set_type const* cuco_store_ptr() const { return cuco_store_.get(); }
 
   template <typename type = value_t>
   std::enable_if_t<!std::is_arithmetic_v<type>, const_value_iterator> store_value_first() const
@@ -821,23 +826,25 @@ class kv_cuco_store_t {
     auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(
       rmm::mr::polymorphic_allocator<std::byte>(rmm::mr::get_current_device_resource()), stream);
     if constexpr (std::is_arithmetic_v<value_t>) {
-      cuco_store_ = std::make_unique<cuco_store_type>(
-        cuco_size,
-        cuco::sentinel::empty_key<key_t>{invalid_key},
-        cuco::sentinel::empty_value<value_t>{invalid_value},
-        thrust::equal_to<key_t>{},
-        cuco::experimental::linear_probing<1,  // CG size
-                                           cuco::murmurhash3_32<key_t>>{},
-        stream_adapter,
-        stream.value());
+      cuco_store_ =
+        std::make_unique<cuco_set_type>(cuco_size,
+                                        cuco::sentinel::empty_key<key_t>{invalid_key},
+                                        cuco::sentinel::empty_value<value_t>{invalid_value},
+                                        thrust::equal_to<key_t>{},
+                                        cuco::linear_probing<1,  // CG size
+                                                             cuco::murmurhash3_32<key_t>>{},
+                                        cuco::thread_scope_device,
+                                        cuco_storage_type{},
+                                        stream_adapter,
+                                        stream.value());
     } else {
-      cuco_store_ = std::make_unique<cuco_store_type>(
+      cuco_store_ = std::make_unique<cuco_set_type>(
         cuco_size,
         cuco::sentinel::empty_key<key_t>{invalid_key},
         cuco::sentinel::empty_value<size_t>{std::numeric_limits<size_t>::max()},
         thrust::equal_to<key_t>{},
-        cuco::experimental::linear_probing<1,  // CG size
-                                           cuco::murmurhash3_32<key_t>>{},
+        cuco::linear_probing<1,  // CG size
+                             cuco::murmurhash3_32<key_t>>{},
         stream_adapter,
         stream);
       store_values_ = allocate_dataframe_buffer<value_t>(0, stream);
@@ -845,7 +852,7 @@ class kv_cuco_store_t {
     }
   }
 
-  std::unique_ptr<cuco_store_type> cuco_store_{nullptr};
+  std::unique_ptr<cuco_set_type> cuco_store_{nullptr};
   std::conditional_t<!std::is_arithmetic_v<value_t>,
                      decltype(allocate_dataframe_buffer<value_t>(0, rmm::cuda_stream_view{})),
                      std::byte /* dummy */>
@@ -857,7 +864,7 @@ class kv_cuco_store_t {
   size_t size_{
     0};  // caching as cuco_store_->size() is expensive (this scans the entire slots to handle
          // user inserts through a device reference (and currently this is an upper bound (this
-         // will become exact once we fully switch to cuco::experimental::static_map and use the
+         // will become exact once we fully switch to cuco::static_map and use the
          // static_map class's insert_and_assign function; this function will be added soon)
 };