Merge branch 'branch-24.10' into nxcg-benchmarking

rapidsai · Aug 30, 2024 · 0ea029d · 0ea029d
2 parents 5a59f9d + d5154d5
commit 0ea029d
Show file tree

Hide file tree

Showing 27 changed files with 965 additions and 314 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
     hooks:
       - id: black
         language_version: python3
-        args: [--target-version=py39]
+        args: [--target-version=py310]
         files: ^(python/.*|benchmarks/.*)$
         exclude: ^python/nx-cugraph/
   - repo: https://github.com/PyCQA/flake8
@@ -42,7 +42,7 @@ repos:
         types_or: [c, c++, cuda]
         args: ["-fallback-style=none", "-style=file", "-i"]
   - repo: https://github.com/rapidsai/pre-commit-hooks
-    rev: v0.3.1
+    rev: v0.4.0
     hooks:
       - id: verify-copyright
         files: |

diff --git a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py
@@ -266,7 +266,7 @@ def uns_func(*args, **kwargs):
 @pytest.mark.managedmem_off
 @pytest.mark.poolallocator_on
 @pytest.mark.parametrize("batch_size", params.batch_sizes.values())
-@pytest.mark.parametrize("fanout", [params.fanout_10_25, params.fanout_5_10_15])
+@pytest.mark.parametrize("fanout", [params.fanout_10_25])
 @pytest.mark.parametrize(
     "with_replacement", [False], ids=lambda v: f"with_replacement={v}"
 )
@@ -287,6 +287,8 @@ def bench_cugraph_uniform_neighbor_sample(
         start_list=uns_args["start_list"],
         fanout_vals=uns_args["fanout"],
         with_replacement=uns_args["with_replacement"],
+        use_legacy_names=False,
+        with_edge_properties=True,
     )
     """
     dtmap = {"int32": 32 // 8, "int64": 64 // 8}

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -42,6 +42,7 @@ dependencies:
 - numpy>=1.23,<2.0a0
 - numpydoc
 - nvcc_linux-64=11.8
+- ogb
 - openmpi
 - packaging>=21
 - pandas
@@ -74,6 +75,7 @@ dependencies:
 - sphinxcontrib-websupport
 - thriftpy2!=0.5.0,!=0.5.1
 - torchdata
+- torchmetrics
 - ucx-proc=*=gpu
 - ucx-py==0.40.*,>=0.0.0a0
 - wget

diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -47,6 +47,7 @@ dependencies:
 - numba>=0.57
 - numpy>=1.23,<2.0a0
 - numpydoc
+- ogb
 - openmpi
 - packaging>=21
 - pandas
@@ -79,6 +80,7 @@ dependencies:
 - sphinxcontrib-websupport
 - thriftpy2!=0.5.0,!=0.5.1
 - torchdata
+- torchmetrics
 - ucx-proc=*=gpu
 - ucx-py==0.40.*,>=0.0.0a0
 - wget

diff --git a/conda/recipes/nx-cugraph/meta.yaml b/conda/recipes/nx-cugraph/meta.yaml
@@ -14,9 +14,7 @@ source:
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
-  build:
-      number: {{ GIT_DESCRIBE_NUMBER }}
-      string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
 
 requirements:
   host:

diff --git a/cpp/examples/developers/graph_operations/graph_operations.cu b/cpp/examples/developers/graph_operations/graph_operations.cu
@@ -131,7 +131,7 @@ create_graph(raft::handle_t const& handle,
   //
 
   if (multi_gpu) {
-    std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore) =
+    std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore, std::ignore) =
       cugraph::shuffle_external_edges<vertex_t, vertex_t, weight_t, int32_t>(handle,
                                                                              std::move(d_edge_srcs),
                                                                              std::move(d_edge_dsts),
@@ -215,10 +215,10 @@ void perform_example_graph_operations(
                                                                                   graph_view);
 
     cugraph::update_edge_src_property(
-      handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache);
+      handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache.mutable_view());
 
     cugraph::update_edge_dst_property(
-      handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache);
+      handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache.mutable_view());
 
     rmm::device_uvector<result_t> weighted_averages(
       size_of_the_vertex_partition_assigned_to_this_process, handle.get_stream());
@@ -259,10 +259,10 @@ void perform_example_graph_operations(
                                                                                   graph_view);
 
     cugraph::update_edge_src_property(
-      handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache);
+      handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache.mutable_view());
 
     cugraph::update_edge_dst_property(
-      handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache);
+      handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache.mutable_view());
 
     rmm::device_uvector<result_t> weighted_averages(
       size_of_the_vertex_partition_assigned_to_this_process, handle.get_stream());

diff --git a/cpp/examples/developers/vertex_and_edge_partition/vertex_and_edge_partition.cu b/cpp/examples/developers/vertex_and_edge_partition/vertex_and_edge_partition.cu
@@ -127,7 +127,7 @@ create_graph(raft::handle_t const& handle,
   //
 
   if (multi_gpu) {
-    std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore) =
+    std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore, std::ignore) =
       cugraph::shuffle_external_edges<vertex_t, vertex_t, weight_t, int32_t>(handle,
                                                                              std::move(d_edge_srcs),
                                                                              std::move(d_edge_dsts),

diff --git a/cpp/examples/users/multi_gpu_application/mg_graph_algorithms.cpp b/cpp/examples/users/multi_gpu_application/mg_graph_algorithms.cpp
@@ -123,7 +123,7 @@ create_graph(raft::handle_t const& handle,
   //
 
   if (multi_gpu) {
-    std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore) =
+    std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore, std::ignore) =
       cugraph::shuffle_external_edges<vertex_t, vertex_t, weight_t, int32_t>(handle,
                                                                              std::move(d_edge_srcs),
                                                                              std::move(d_edge_dsts),
@@ -248,9 +248,8 @@ void run_graph_algorithms(
                             std::cout);
 }
 
-int main(int argc, char** argv)
+void run_tests()
 {
-  initialize_mpi_and_set_device(argc, argv);
   std::unique_ptr<raft::handle_t> handle = initialize_mg_handle();
 
   //
@@ -279,6 +278,7 @@ int main(int argc, char** argv)
       std::move(std::make_optional(edge_wgts)),
       renumber,
       is_symmetric);
+
   // Non-owning view of the graph object
   auto graph_view = graph.view();
 
@@ -292,5 +292,14 @@ int main(int argc, char** argv)
   run_graph_algorithms<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
     *handle, graph_view, edge_weight_view);
 
+  handle.release();
+}
+
+int main(int argc, char** argv)
+{
+  initialize_mpi_and_set_device(argc, argv);
+
+  run_tests();
+
   RAFT_MPI_TRY(MPI_Finalize());
 }
diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp
@@ -1178,7 +1178,8 @@ std::tuple<rmm::device_uvector<vertex_t>,
            rmm::device_uvector<vertex_t>,
            std::optional<rmm::device_uvector<weight_t>>,
            std::optional<rmm::device_uvector<edge_t>>,
-           std::optional<rmm::device_uvector<edge_type_t>>>
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::vector<size_t>>
 shuffle_external_edges(raft::handle_t const& handle,
                        rmm::device_uvector<vertex_t>&& edge_srcs,
                        rmm::device_uvector<vertex_t>&& edge_dsts,

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
@@ -490,7 +490,7 @@ ConfigureTest(SAMPLING_POST_PROCESSING_TEST sampling/sampling_post_processing_te
 
 ###################################################################################################
 # - NEGATIVE SAMPLING tests --------------------------------------------------------------------
-ConfigureTest(NEGATIVE_SAMPLING_TEST sampling/negative_sampling.cpp)
+ConfigureTest(NEGATIVE_SAMPLING_TEST sampling/negative_sampling.cpp PERCENT 100)
 
 ###################################################################################################
 # - Renumber tests --------------------------------------------------------------------------------

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -713,6 +713,8 @@ dependencies:
           - &pytorch_unsuffixed pytorch>=2.0,<2.2.0a0
           - torchdata
           - pydantic
+          - ogb
+          - torchmetrics
 
     specific:
       - output_types: [requirements]

diff --git a/docs/cugraph/source/installation/source_build.md b/docs/cugraph/source/installation/source_build.md
@@ -12,8 +12,7 @@ __Compilers:__
 * `nvcc`          version 11.5+
 
 __CUDA:__
-* CUDA 11.2+
-* NVIDIA driver 470.42.01 or newer
+* CUDA 11.8+
 * NVIDIA GPU, Volta architecture or later, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+
 
 Further details and download links for these prerequisites are available on the
@@ -178,7 +177,7 @@ Run either the C++ or the Python tests with datasets
    make test
    ```
 
-Note: This conda installation only applies to Linux and Python versions 3.8/3.11.
+Note: This conda installation only applies to Linux and Python versions 3.10 and 3.11.
 
 ### (OPTIONAL) Set environment variable on activation
 

diff --git a/docs/cugraph/source/tutorials/cugraph_notebooks.md b/docs/cugraph/source/tutorials/cugraph_notebooks.md
@@ -55,10 +55,9 @@ Running the example in these notebooks requires:
   * Download via Docker, Conda (See [__Getting Started__](https://rapids.ai/start.html))
 
 * cuGraph is dependent on the latest version of cuDF.  Please install all components of RAPIDS
-* Python 3.8+
-* A system with an NVIDIA GPU:  Pascal architecture or better
+* Python 3.10+
+* A system with an NVIDIA GPU: Volta architecture or newer
 * CUDA 11.4+
-* NVIDIA driver 450.51+
 
 ## Copyright
 

diff --git a/docs/cugraph/source/wholegraph/installation/source_build.md b/docs/cugraph/source/wholegraph/installation/source_build.md
@@ -16,8 +16,7 @@ __Compiler__:
 
 __CUDA__:
 * CUDA 11.8+
-* NVIDIA driver 450.80.02+
-* Pascal architecture or better
+* Volta architecture or better
 
 You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads).
 
@@ -177,7 +176,7 @@ Run either the C++ or the Python tests with datasets
     ```
 
 
-Note: This conda installation only applies to Linux and Python versions 3.8/3.10.
+Note: This conda installation only applies to Linux and Python versions 3.10 and 3.11.
 
 ## Creating documentation
 

diff --git a/notebooks/README.md b/notebooks/README.md
@@ -56,10 +56,9 @@ Running the example in these notebooks requires:
   * Download via Docker, Conda (See [__Getting Started__](https://rapids.ai/start.html))
 
 * cuGraph is dependent on the latest version of cuDF.  Please install all components of RAPIDS
-* Python 3.8+
-* A system with an NVIDIA GPU:  Pascal architecture or better
+* Python 3.10+
+* A system with an NVIDIA GPU: Volta architecture or newer
 * CUDA 11.4+
-* NVIDIA driver 450.51+
 
 ### QuickStart
 

diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py
@@ -140,6 +140,10 @@ def __init__(
         self.__graph = graph
         self.__device = device
 
+    @property
+    def _batch_size(self):
+        return self.__batch_size
+
     @property
     def dataset(
         self,

diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py
@@ -194,7 +194,7 @@ def sample(
 
         if g.is_homogeneous:
             indices = torch.concat(list(indices))
-            ds.sample_from_nodes(indices, batch_size=batch_size)
+            ds.sample_from_nodes(indices.long(), batch_size=batch_size)
             return HomogeneousSampleReader(
                 ds.get_reader(), self.output_format, self.edge_dir
             )

diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py
@@ -29,6 +29,7 @@
     HeteroNodeDataView,
     HeteroEdgeView,
     HeteroEdgeDataView,
+    EmbeddingView,
 )
 
 
@@ -567,8 +568,8 @@ def _has_n_emb(self, ntype: str, emb_name: str) -> bool:
         return (ntype, emb_name) in self.__ndata_storage
 
     def _get_n_emb(
-        self, ntype: str, emb_name: str, u: Union[str, TensorType]
-    ) -> "torch.Tensor":
+        self, ntype: Union[str, None], emb_name: str, u: Union[str, TensorType]
+    ) -> Union["torch.Tensor", "EmbeddingView"]:
         """
         Gets the embedding of a single node type.
         Unlike DGL, this function takes the string node
@@ -583,11 +584,11 @@ def _get_n_emb(
         u: Union[str, TensorType]
             Nodes to get the representation of, or ALL
             to get the representation of all nodes of
-            the given type.
+            the given type (returns embedding view).
 
         Returns
         -------
-        torch.Tensor
+        Union[torch.Tensor, cugraph_dgl.view.EmbeddingView]
             The embedding of the given edge type with the given embedding name.
         """
 
@@ -598,9 +599,14 @@ def _get_n_emb(
                 raise ValueError("Must provide the node type for a heterogeneous graph")
 
         if dgl.base.is_all(u):
-            u = torch.arange(self.num_nodes(ntype), dtype=self.idtype, device="cpu")
+            return EmbeddingView(
+                self.__ndata_storage[ntype, emb_name], self.num_nodes(ntype)
+            )
 
         try:
+            print(
+                u,
+            )
             return self.__ndata_storage[ntype, emb_name].fetch(
                 _cast_to_torch_tensor(u), "cuda"
             )
@@ -644,7 +650,9 @@ def _get_e_emb(
         etype = self.to_canonical_etype(etype)
 
         if dgl.base.is_all(u):
-            u = torch.arange(self.num_edges(etype), dtype=self.idtype, device="cpu")
+            return EmbeddingView(
+                self.__edata_storage[etype, emb_name], self.num_edges(etype)
+            )
 
         try:
             return self.__edata_storage[etype, emb_name].fetch(

diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py
@@ -12,6 +12,8 @@
 # limitations under the License.
 
 
+import warnings
+
 from collections import defaultdict
 from collections.abc import MutableMapping
 from typing import Union, Dict, List, Tuple
@@ -20,11 +22,45 @@
 
 import cugraph_dgl
 from cugraph_dgl.typing import TensorType
+from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor
 
 torch = import_optional("torch")
 dgl = import_optional("dgl")
 
 
+class EmbeddingView:
+    def __init__(self, storage: "dgl.storages.base.FeatureStorage", ld: int):
+        self.__ld = ld
+        self.__storage = storage
+
+    def __getitem__(self, u: TensorType) -> "torch.Tensor":
+        u = _cast_to_torch_tensor(u)
+        try:
+            return self.__storage.fetch(
+                u,
+                "cuda",
+            )
+        except RuntimeError as ex:
+            warnings.warn(
+                "Got error accessing data, trying again with index on device: "
+                + str(ex)
+            )
+            return self.__storage.fetch(
+                u.cuda(),
+                "cuda",
+            )
+
+    @property
+    def shape(self) -> "torch.Size":
+        try:
+            f = self.__storage.fetch(torch.tensor([0]), "cpu")
+        except RuntimeError:
+            f = self.__storage.fetch(torch.tensor([0], device="cuda"), "cuda")
+        sz = [s for s in f.shape]
+        sz[0] = self.__ld
+        return torch.Size(tuple(sz))
+
+
 class HeteroEdgeDataView(MutableMapping):
     """
     Duck-typed version of DGL's HeteroEdgeDataView.