Merge remote-tracking branch 'upstream/branch-24.06' into branch-24.0…

…6-nxcg_ego_benchmark
rapidsai · May 29, 2024 · c592180 · c592180
2 parents 980f486 + 04e8000
commit c592180
Show file tree

Hide file tree

Showing 89 changed files with 2,092 additions and 842 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -7,6 +7,11 @@ FROM ${BASE} as pip-base
 
 ENV DEFAULT_VIRTUAL_ENV=rapids
 
+RUN apt update -y \
+ && DEBIAN_FRONTEND=noninteractive apt install -y \
+    libblas-dev liblapack-dev \
+ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
+
 FROM ${BASE} as conda-base
 
 ENV DEFAULT_CONDA_ENV=rapids

diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {

diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,19 +5,16 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "11.8",
       "installcuBLAS": true,
@@ -28,7 +25,6 @@
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],

diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {

diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -5,19 +5,16 @@
     "args": {
       "CUDA": "12.2",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "12.2",
       "installcuBLAS": true,
@@ -28,7 +25,6 @@
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -196,5 +196,5 @@ jobs:
       extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
       build_command: |
         sccache -z;
-        build-all --verbose -j$(nproc --ignore=1);
+        build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON;
         sccache -s;
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py
@@ -344,7 +344,7 @@ def generate_rmat_dataset(
     del label_df
     gc.collect()
 
-    dask_label_df = dask_cudf.from_dask_dataframe(dask_label_df)
+    dask_label_df = dask_label_df.to_backend("cudf")
 
     node_offsets = {"paper": 0}
     edge_offsets = {("paper", "cites", "paper"): 0}

diff --git a/ci/test_python.sh b/ci/test_python.sh
@@ -3,10 +3,6 @@
 
 set -euo pipefail
 
-# TODO: Enable dask query planning (by default) once some bugs are fixed.
-# xref: https://github.com/rapidsai/cudf/issues/15027
-export DASK_DATAFRAME__QUERY_PLANNING=False
-
 # Support invoking test_python.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 

diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
@@ -3,10 +3,6 @@
 
 set -eoxu pipefail
 
-# TODO: Enable dask query planning (by default) once some bugs are fixed.
-# xref: https://github.com/rapidsai/cudf/issues/15027
-export DASK_DATAFRAME__QUERY_PLANNING=False
-
 package_name=$1
 package_dir=$2
 

diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh
@@ -32,8 +32,18 @@ fi
 PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}"
 DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html"
 
+# Starting from 2.2, PyTorch wheels depend on nvidia-nccl-cuxx>=2.19 wheel and
+# dynamically link to NCCL. RAPIDS CUDA 11 CI images have an older NCCL version that
+# might shadow the newer NCCL required by PyTorch during import (when importing
+# `cupy` before `torch`).
+if [[ "${NCCL_VERSION}" < "2.19" ]]; then
+  PYTORCH_VER="2.1.0"
+else
+  PYTORCH_VER="2.3.0"
+fi
+
 rapids-logger "Installing PyTorch and DGL"
-rapids-retry python -m pip install torch --index-url ${PYTORCH_URL}
+rapids-retry python -m pip install "torch==${PYTORCH_VER}" --index-url ${PYTORCH_URL}
 rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL}
 
 python -m pytest python/cugraph-dgl/tests
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -92,14 +92,14 @@ set(CUGRAPH_CXX_FLAGS "")
 set(CUGRAPH_CUDA_FLAGS "")
 
 if(CMAKE_COMPILER_IS_GNUCXX)
-    list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations)
+    list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
 endif(CMAKE_COMPILER_IS_GNUCXX)
 
 
 message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}")
 
 list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
-list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings)
+list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS -Xptxas=--disable-warnings)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all)
 
@@ -180,6 +180,7 @@ set(CUGRAPH_SOURCES
     src/community/detail/refine_sg.cu
     src/community/detail/refine_mg.cu
     src/community/edge_triangle_count_sg.cu
+    src/community/edge_triangle_count_mg.cu
     src/community/detail/maximal_independent_moves_sg.cu
     src/community/detail/maximal_independent_moves_mg.cu
     src/detail/utility_wrappers.cu

diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
@@ -2007,6 +2007,24 @@ void triangle_count(raft::handle_t const& handle,
                     raft::device_span<edge_t> counts,
                     bool do_expensive_check = false);
 
+/*
+ * @brief Compute edge triangle counts.
+ *
+ * Compute edge triangle counts for the entire set of edges.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ *
+ * @return edge_property_t containing the edge triangle count
+ */
+template <typename vertex_t, typename edge_t, bool multi_gpu>
+edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> edge_triangle_count(
+  raft::handle_t const& handle, graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view);
+
 /*
  * @brief Compute K-Truss.
  *

diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,13 +78,14 @@ struct indirection_t {
 
 template <typename index_t, typename Iterator>
 struct indirection_if_idx_valid_t {
+  using value_type = typename thrust::iterator_traits<Iterator>::value_type;
   Iterator first{};
   index_t invalid_idx{};
-  typename thrust::iterator_traits<Iterator>::value_type invalid_value{};
+  value_type invalid_value{};
 
-  __device__ typename thrust::iterator_traits<Iterator>::value_type operator()(index_t i) const
+  __device__ value_type operator()(index_t i) const
   {
-    return (i != invalid_idx) ? *(first + i) : invalid_value;
+    return (i != invalid_idx) ? static_cast<value_type>(*(first + i)) : invalid_value;
   }
 };
 

diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh
@@ -20,6 +20,7 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -160,13 +161,13 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz
     handle.get_thrust_policy(),
     thrust::make_counting_iterator(size_t{0}),
     thrust::make_counting_iterator(packed_bool_size(num_bits)),
-    [mask_first, num_bits] __device__(size_t i) {
+    cuda::proclaim_return_type<size_t>([mask_first, num_bits] __device__(size_t i) -> size_t {
       auto word = *(mask_first + i);
       if ((i + 1) * packed_bools_per_word() > num_bits) {
         word &= packed_bool_partial_mask(num_bits % packed_bools_per_word());
       }
       return static_cast<size_t>(__popc(word));
-    },
+    }),
     size_t{0},
     thrust::plus<size_t>{});
 }

diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
@@ -29,6 +29,7 @@
 #include <cugraph/detail/utility_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
 
+#include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
@@ -178,7 +179,7 @@ weight_t compute_modularity(
     handle.get_thrust_policy(),
     cluster_weights.begin(),
     cluster_weights.end(),
-    [] __device__(weight_t p) { return p * p; },
+    cuda::proclaim_return_type<weight_t>([] __device__(weight_t p) -> weight_t { return p * p; }),
     weight_t{0},
     thrust::plus<weight_t>());