From 27f8ce1d8c1e4a24e3b15e12fea7a43e5f7566b8 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Mon, 21 Oct 2024 15:40:20 -0400 Subject: [PATCH 1/5] [FEA] Support Edge ID Lookup in PyLibcuGraph (#4687) Support Edge ID lookup in `pylibcugraph`. Also fixes some bugs in the C API (i.e. lookup table not being cleaned up correctly, container being incorrectly dereferenced as graph). Verified in rapidsai/cugraph-gnn#50 Authors: - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4687 --- cpp/include/cugraph_c/lookup_src_dst.h | 8 ++ cpp/src/c_api/lookup_src_dst.cpp | 45 ++++--- .../pylibcugraph/pylibcugraph/CMakeLists.txt | 1 + python/pylibcugraph/pylibcugraph/__init__.py | 2 + .../_cugraph_c/lookup_src_dst.pxd | 2 + .../pylibcugraph/edge_id_lookup_table.pxd | 34 ++++++ .../pylibcugraph/edge_id_lookup_table.pyx | 114 ++++++++++++++++++ .../internal_types/CMakeLists.txt | 1 + .../internal_types/edge_id_lookup_result.pxd | 30 +++++ .../internal_types/edge_id_lookup_result.pyx | 63 ++++++++++ .../pylibcugraph/tests/test_lookup_table.py | 80 ++++++++++++ 11 files changed, 364 insertions(+), 16 deletions(-) create mode 100644 python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pxd create mode 100644 python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pyx create mode 100644 python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pxd create mode 100644 python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pyx create mode 100644 python/pylibcugraph/pylibcugraph/tests/test_lookup_table.py diff --git a/cpp/include/cugraph_c/lookup_src_dst.h b/cpp/include/cugraph_c/lookup_src_dst.h index f4d63572e82..64051743981 100644 --- a/cpp/include/cugraph_c/lookup_src_dst.h +++ b/cpp/include/cugraph_c/lookup_src_dst.h @@ -136,6 +136,14 @@ cugraph_type_erased_device_array_view_t* cugraph_lookup_result_get_dsts( */ void cugraph_lookup_result_free(cugraph_lookup_result_t* result); +/** + * @ingroup samplingC + * @brief Free a sampling lookup map + * + * @param [in] container The sampling lookup map (a.k.a. container). + */ +void cugraph_lookup_container_free(cugraph_lookup_container_t* container); + #ifdef __cplusplus } #endif diff --git a/cpp/src/c_api/lookup_src_dst.cpp b/cpp/src/c_api/lookup_src_dst.cpp index 1be2137ef2f..3b87791ac50 100644 --- a/cpp/src/c_api/lookup_src_dst.cpp +++ b/cpp/src/c_api/lookup_src_dst.cpp @@ -307,23 +307,26 @@ extern "C" cugraph_error_code_t cugraph_lookup_endpoints_from_edge_ids_and_types { CAPI_EXPECTS( reinterpret_cast(graph)->vertex_type_ == - reinterpret_cast(lookup_container)->vertex_type_, + reinterpret_cast(lookup_container) + ->vertex_type_, CUGRAPH_INVALID_INPUT, "vertex type of graph and lookup_container must match", *error); CAPI_EXPECTS( reinterpret_cast(graph)->edge_type_ == - reinterpret_cast(lookup_container)->edge_type_, + reinterpret_cast(lookup_container) + ->edge_type_, CUGRAPH_INVALID_INPUT, "edge type of graph and lookup_container must match", *error); - CAPI_EXPECTS(reinterpret_cast(graph)->edge_type_id_type_ == - reinterpret_cast(lookup_container) - ->edge_type_id_type_, - CUGRAPH_INVALID_INPUT, - "edge type id type of graph and lookup_container must match", - *error); + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_type_id_type_ == + reinterpret_cast(lookup_container) + ->edge_type_id_type_, + CUGRAPH_INVALID_INPUT, + "edge type id type of graph and lookup_container must match", + *error); lookup_using_edge_ids_and_types_functor functor( handle, graph, lookup_container, edge_ids_to_lookup, edge_types_to_lookup); @@ -341,23 +344,26 @@ extern "C" cugraph_error_code_t cugraph_lookup_endpoints_from_edge_ids_and_singl { CAPI_EXPECTS( reinterpret_cast(graph)->vertex_type_ == - reinterpret_cast(lookup_container)->vertex_type_, + reinterpret_cast(lookup_container) + ->vertex_type_, CUGRAPH_INVALID_INPUT, "vertex type of graph and lookup_container must match", *error); CAPI_EXPECTS( reinterpret_cast(graph)->edge_type_ == - reinterpret_cast(lookup_container)->edge_type_, + reinterpret_cast(lookup_container) + ->edge_type_, CUGRAPH_INVALID_INPUT, "edge type of graph and lookup_container must match", *error); - CAPI_EXPECTS(reinterpret_cast(graph)->edge_type_id_type_ == - reinterpret_cast(lookup_container) - ->edge_type_id_type_, - CUGRAPH_INVALID_INPUT, - "edge type id type of graph and lookup_container must match", - *error); + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_type_id_type_ == + reinterpret_cast(lookup_container) + ->edge_type_id_type_, + CUGRAPH_INVALID_INPUT, + "edge type id type of graph and lookup_container must match", + *error); lookup_using_edge_ids_of_single_type_functor functor( handle, graph, lookup_container, edge_ids_to_lookup, edge_type_to_lookup); @@ -387,3 +393,10 @@ extern "C" void cugraph_lookup_result_free(cugraph_lookup_result_t* result) delete internal_pointer->dsts_; delete internal_pointer; } + +extern "C" void cugraph_lookup_container_free(cugraph_lookup_container_t* container) +{ + auto internal_ptr = reinterpret_cast(container); + // The graph should presumably own the other structures. + delete internal_ptr; +} diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index 9f1b9924336..3a53c7d16c3 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -65,6 +65,7 @@ set(cython_sources all_pairs_sorensen_coefficients.pyx all_pairs_overlap_coefficients.pyx all_pairs_cosine_coefficients.pyx + edge_id_lookup_table.pyx ) set(linked_libraries cugraph::cugraph;cugraph::cugraph_c) diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index 26fa3f64ddd..9c04a528fd8 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -21,6 +21,8 @@ from pylibcugraph.graph_properties import GraphProperties +from pylibcugraph.edge_id_lookup_table import EdgeIdLookupTable + from pylibcugraph.eigenvector_centrality import eigenvector_centrality from pylibcugraph.katz_centrality import katz_centrality diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/lookup_src_dst.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/lookup_src_dst.pxd index 710ca7d113b..e8a2bbf47ae 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/lookup_src_dst.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/lookup_src_dst.pxd @@ -70,3 +70,5 @@ cdef extern from "cugraph_c/lookup_src_dst.h": const cugraph_lookup_result_t* result) cdef void cugraph_lookup_result_free(cugraph_lookup_result_t* result) + + cdef void cugraph_lookup_container_free(cugraph_lookup_container_t* container) diff --git a/python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pxd b/python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pxd new file mode 100644 index 00000000000..9bbd19963a7 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pxd @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.lookup_src_dst cimport ( + cugraph_lookup_container_t, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) + +cdef class EdgeIdLookupTable: + cdef ResourceHandle handle, + cdef _GPUGraph graph, + cdef cugraph_lookup_container_t* lookup_container_c_ptr diff --git a/python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pyx b/python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pyx new file mode 100644 index 00000000000..49ccdbdd168 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/edge_id_lookup_table.pyx @@ -0,0 +1,114 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from pylibcugraph._cugraph_c.resource_handle cimport ( + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, + cugraph_type_erased_device_array_view_create, + cugraph_type_erased_device_array_view_free, + cugraph_type_erased_host_array_view_t, + cugraph_type_erased_host_array_view_create, + cugraph_type_erased_host_array_view_free, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.lookup_src_dst cimport ( + cugraph_lookup_container_t, + cugraph_build_edge_id_and_type_to_src_dst_lookup_map, + cugraph_lookup_container_free, + cugraph_lookup_endpoints_from_edge_ids_and_single_type, + cugraph_lookup_result_t, +) +from pylibcugraph.utils cimport ( + assert_success, + assert_CAI_type, + assert_AI_type, + get_c_type_from_numpy_type, + create_cugraph_type_erased_device_array_view_from_py_obj +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph.internal_types.edge_id_lookup_result cimport ( + EdgeIdLookupResult, +) + +cdef class EdgeIdLookupTable: + def __cinit__(self, ResourceHandle resource_handle, _GPUGraph graph): + self.handle = resource_handle + self.graph = graph + + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + error_code = cugraph_build_edge_id_and_type_to_src_dst_lookup_map( + self.handle.c_resource_handle_ptr, + self.graph.c_graph_ptr, + &self.lookup_container_c_ptr, + &error_ptr, + ) + + assert_success(error_code, error_ptr, "cugraph_build_edge_id_and_type_to_src_dst_lookup_map") + + def __dealloc__(self): + if self.lookup_container_c_ptr is not NULL: + cugraph_lookup_container_free(self.lookup_container_c_ptr) + + def lookup_vertex_ids( + self, + edge_ids, + int edge_type + ): + """ + For a single edge type, finds the source and destination vertex ids corresponding + to the provided edge ids. + """ + + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + cdef cugraph_lookup_result_t* result_ptr + + cdef cugraph_type_erased_device_array_view_t* edge_ids_c_ptr + edge_ids_c_ptr = create_cugraph_type_erased_device_array_view_from_py_obj(edge_ids) + + error_code = cugraph_lookup_endpoints_from_edge_ids_and_single_type( + self.handle.c_resource_handle_ptr, + self.graph.c_graph_ptr, + self.lookup_container_c_ptr, + edge_ids_c_ptr, + edge_type, + &result_ptr, + &error_ptr, + ) + + assert_success(error_code, error_ptr, "cugraph_lookup_endpoints_from_edge_ids_and_single_type") + + lr = EdgeIdLookupResult() + lr.set_ptr((result_ptr)) + return { + 'sources': lr.get_sources(), + 'destinations': lr.get_destinations(), + } diff --git a/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt index 22f07939db0..1b0d6ec71a4 100644 --- a/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt @@ -15,6 +15,7 @@ set(cython_sources sampling_result.pyx coo.pyx + edge_id_lookup_result.pyx ) set(linked_libraries cugraph::cugraph;cugraph::cugraph_c) diff --git a/python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pxd b/python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pxd new file mode 100644 index 00000000000..68dd2362a00 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pxd @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + + +from pylibcugraph._cugraph_c.lookup_src_dst cimport ( + cugraph_lookup_result_t +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) + +cdef class EdgeIdLookupResult: + cdef cugraph_lookup_result_t* result_c_ptr + + cdef get_array(self, cugraph_type_erased_device_array_view_t* ptr) + + cdef set_ptr(self, cugraph_lookup_result_t* ptr) diff --git a/python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pyx new file mode 100644 index 00000000000..5f7165ce988 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/internal_types/edge_id_lookup_result.pyx @@ -0,0 +1,63 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from pylibcugraph._cugraph_c.lookup_src_dst cimport ( + cugraph_lookup_result_t, + cugraph_lookup_result_free, + cugraph_lookup_result_get_dsts, + cugraph_lookup_result_get_srcs, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) +from pylibcugraph.utils cimport ( + create_cupy_array_view_for_device_ptr, +) + +cdef class EdgeIdLookupResult: + def __cinit__(self): + """ + Sets this object as the owner of the given pointer. + """ + self.result_c_ptr = NULL + + cdef set_ptr(self, cugraph_lookup_result_t* ptr): + self.result_c_ptr = ptr + + def __dealloc__(self): + if self.result_c_ptr is not NULL: + cugraph_lookup_result_free(self.result_c_ptr) + + cdef get_array(self, cugraph_type_erased_device_array_view_t* ptr): + if ptr is NULL: + return None + + return create_cupy_array_view_for_device_ptr( + ptr, + self, + ) + + def get_sources(self): + if self.result_c_ptr is NULL: + return None + cdef cugraph_type_erased_device_array_view_t* ptr = cugraph_lookup_result_get_srcs(self.result_c_ptr) + return self.get_array(ptr) + + def get_destinations(self): + if self.result_c_ptr is NULL: + return None + cdef cugraph_type_erased_device_array_view_t* ptr = cugraph_lookup_result_get_dsts(self.result_c_ptr) + return self.get_array(ptr) diff --git a/python/pylibcugraph/pylibcugraph/tests/test_lookup_table.py b/python/pylibcugraph/pylibcugraph/tests/test_lookup_table.py new file mode 100644 index 00000000000..2910a5f8d4d --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/tests/test_lookup_table.py @@ -0,0 +1,80 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cupy + +from pylibcugraph import ( + SGGraph, + ResourceHandle, + GraphProperties, + EdgeIdLookupTable, +) + + +# ============================================================================= +# Pytest fixtures +# ============================================================================= +# fixtures used in this test module are defined in conftest.py + + +# ============================================================================= +# Tests +# ============================================================================= + + +def test_lookup_table(): + # Vertex id array + vtcs = cupy.arange(6, dtype="int64") + + # Edge ids are unique per edge type and start from 0 + # Each edge type has the same src/dst vertex type here, + # just as it would in a GNN application. + srcs = cupy.array([0, 1, 5, 4, 3, 2, 2, 0, 5, 4, 4, 5]) + dsts = cupy.array([1, 5, 0, 3, 2, 1, 3, 3, 2, 3, 1, 4]) + etps = cupy.array([0, 2, 6, 7, 4, 3, 4, 1, 7, 7, 6, 8], dtype="int32") + eids = cupy.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 1, 0]) + + wgts = cupy.ones((len(srcs),), dtype="float32") + + graph = SGGraph( + resource_handle=ResourceHandle(), + graph_properties=GraphProperties(is_symmetric=False, is_multigraph=True), + src_or_offset_array=srcs, + dst_or_index_array=dsts, + vertices_array=vtcs, + weight_array=wgts, + edge_id_array=eids, + edge_type_array=etps, + store_transposed=False, + renumber=False, + do_expensive_check=True, + ) + + table = EdgeIdLookupTable(ResourceHandle(), graph) + + assert table is not None + + found_edges = table.lookup_vertex_ids(cupy.array([0, 1, 2, 3, 4]), 7) + assert (found_edges["sources"] == cupy.array([4, 5, 4, -1, -1])).all() + assert (found_edges["destinations"] == cupy.array([3, 2, 3, -1, -1])).all() + + found_edges = table.lookup_vertex_ids(cupy.array([0]), 5) + assert (found_edges["sources"] == cupy.array([-1])).all() + assert (found_edges["destinations"] == cupy.array([-1])).all() + + found_edges = table.lookup_vertex_ids(cupy.array([3, 1, 0, 5]), 6) + assert (found_edges["sources"] == cupy.array([-1, 4, 5, -1])).all() + assert (found_edges["destinations"] == cupy.array([-1, 1, 0, -1])).all() + + # call __dealloc__() + del table From a9192a8575ee7e79d0b41d59382884cc7dfeabc4 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:55:02 +0100 Subject: [PATCH 2/5] Symmetrize edgelist when creating a CSR graph (#4716) This PR allows the edge list to be symmetrized when creating a graph from a CSR representation. closes #4693 Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4716 --- cpp/src/c_api/graph_sg.cpp | 2 +- .../cugraph/tests/structure/test_graph.py | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index e7ebbc2d319..e57d6b5bb14 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -761,7 +761,7 @@ cugraph_error_code_t cugraph_graph_create_sg_from_csr( p_edge_ids, p_edge_type_ids, renumber, - FALSE, // symmetrize + symmetrize, do_expensive_check); try { diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index 48a0b257b12..b3e517100e1 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -26,6 +26,7 @@ from cudf.testing import assert_series_equal from cudf.testing.testing import assert_frame_equal from cugraph.structure.symmetrize import symmetrize +from cugraph.datasets import karate_asymmetric # MG import dask_cudf @@ -204,6 +205,37 @@ def test_add_adj_list_to_edge_list(graph_file): compare_series(destinations_cu, destinations_exp) +@pytest.mark.sg +def test_create_undirected_graph_from_asymmetric_adj_list(): + # karate_asymmetric.get_path() + Mnx = utils.read_csv_for_nx(karate_asymmetric.get_path()) + N = max(max(Mnx["0"]), max(Mnx["1"])) + 1 + Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx["0"], Mnx["1"])), shape=(N, N)) + + offsets = cudf.Series(Mcsr.indptr) + indices = cudf.Series(Mcsr.indices) + + G = cugraph.Graph(directed=False) + + with pytest.raises(Exception): + # Ifan undirected graph is created with 'symmetrize' set to False, the + # edgelist provided by the user must be symmetric. + G.from_cudf_adjlist(offsets, indices, None, symmetrize=False) + + G = cugraph.Graph(directed=False) + G.from_cudf_adjlist(offsets, indices, None, symmetrize=True) + + # FIXME: Since we have no mechanism to access the symmetrized edgelist + # from the graph_view_t, assert that the edgelist size is unchanged. Once + # exposing 'decompress_to_edgelist', ensure that + # G.number_of_edges() == 2 * karate_asymmetric.get_edgelist()? + assert G.number_of_edges() == len(karate_asymmetric.get_edgelist()) + + # FIXME: Once 'decompress_to_edgelist' is exposed to the + # python API, ensure that the derived edgelist is symmetric + # if symmetrize = True. + + # Test @pytest.mark.sg @pytest.mark.parametrize("graph_file", utils.DATASETS) From 201ff7cdb9a0e894b0e41cdaf25b50b61bf83cf9 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 22 Oct 2024 12:00:13 -0500 Subject: [PATCH 3/5] Updates README with new dataset, removes mention of script no longer used (#4736) This is a doc-only PR which updates the README for nx-cugraph pytest-based benchmarks: * Adds description of a dataset that's being used * Removes mention of script no longer used --- benchmarks/nx-cugraph/pytest-based/README.md | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/benchmarks/nx-cugraph/pytest-based/README.md b/benchmarks/nx-cugraph/pytest-based/README.md index 5d2406bfcd5..414a22171a0 100644 --- a/benchmarks/nx-cugraph/pytest-based/README.md +++ b/benchmarks/nx-cugraph/pytest-based/README.md @@ -10,6 +10,7 @@ Our current benchmarks provide the following datasets: | -------- | ------- | ------- | ------- | | netscience | 1,461 | 5,484 | Yes | | email-Eu-core | 1,005 | 25,571 | Yes | +| amazon0302 | 262,111 | 1,234,877 | Yes | | cit-Patents | 3,774,768 | 16,518,948 | Yes | | hollywood | 1,139,905 | 57,515,616 | No | | soc-LiveJournal1 | 4,847,571 | 68,993,773 | Yes | @@ -39,16 +40,8 @@ NOTE: ./run-main-benchmarks.sh ``` -#### 2. `get_graph_bench_dataset.py` -This script downloads the specified dataset using `cugraph.datasets`. - -**Usage:** - ```bash - python get_graph_bench_dataset.py [dataset] - ``` - -#### 3. `create_results_summary_page.py` -This script is designed to be run after `run-gap-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run. +#### 2. `create_results_summary_page.py` +This script is designed to be run after `run-main-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run. **Usage:** ```bash From 7390ae2f8e25ec65a222c5a868942bb67615624d Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:16:09 -0400 Subject: [PATCH 4/5] Address Leiden clustering generating too many clusters (#4730) Our implementation of Leiden was generating too many clusters. This was not obvious in smaller graphs, but as the graphs get larger the problem became more noticeable. The Leiden loop was terminating if the modularity stopped improving. But the Leiden algorithm as defined in the paper allows the refinement phase to reduce modularity in order to improve the quality of the clusters. The convergence criteria defined in the paper was based on making no changes on the iteration rather than strictly monitoring modularity change. Updating this criteria results in the Leiden algorithm running more iterations and converging on better answers. Closes #4529 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Naim (https://github.com/naimnv) - Joseph Nke (https://github.com/jnke2016) - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/4730 --- cpp/src/community/detail/refine.hpp | 3 +-- cpp/src/community/detail/refine_impl.cuh | 9 +++++-- cpp/src/community/detail/refine_mg_v32_e32.cu | 6 ++--- cpp/src/community/detail/refine_mg_v64_e64.cu | 6 ++--- cpp/src/community/detail/refine_sg_v32_e32.cu | 6 ++--- cpp/src/community/detail/refine_sg_v64_e64.cu | 6 ++--- cpp/src/community/leiden_impl.cuh | 26 ++++++++++++------- 7 files changed, 33 insertions(+), 29 deletions(-) diff --git a/cpp/src/community/detail/refine.hpp b/cpp/src/community/detail/refine.hpp index a60efee887f..429e0e9e6c2 100644 --- a/cpp/src/community/detail/refine.hpp +++ b/cpp/src/community/detail/refine.hpp @@ -46,8 +46,7 @@ refine_clustering( rmm::device_uvector&& next_clusters_v, edge_src_property_t const& src_vertex_weights_cache, edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache, - bool up_down); + edge_dst_property_t const& dst_clusters_cache); } } // namespace cugraph diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 272e3d71f83..62b66ed5f41 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -150,8 +150,7 @@ refine_clustering( edge_src_property_t const& src_louvain_assignment_cache, edge_dst_property_t const& - dst_louvain_assignment_cache, - bool up_down) + dst_louvain_assignment_cache) { const weight_t POSITIVE_GAIN = 1e-6; using vertex_t = typename GraphViewType::vertex_type; @@ -230,6 +229,7 @@ refine_clustering( cugraph::reduce_op::plus{}, weighted_cut_of_vertices_to_louvain.begin()); + // FIXME: Consider using bit mask logic here. Would reduce memory by 8x rmm::device_uvector singleton_and_connected_flags( graph_view.local_vertex_partition_range_size(), handle.get_stream()); @@ -297,6 +297,11 @@ refine_clustering( edge_dst_property_t dst_leiden_assignment_cache(handle); edge_src_property_t src_singleton_and_connected_flag_cache(handle); + // FIXME: Why is kvstore used here? Can't this be accomplished by + // a direct lookup in louvain_assignment_of_vertices using + // leiden - graph_view.local_vertex_partition_range_first() as the + // index? + // Changing this would save memory and time kv_store_t leiden_to_louvain_map( leiden_assignment.begin(), leiden_assignment.end(), diff --git a/cpp/src/community/detail/refine_mg_v32_e32.cu b/cpp/src/community/detail/refine_mg_v32_e32.cu index d27260c1337..ce46a48ed5c 100644 --- a/cpp/src/community/detail/refine_mg_v32_e32.cu +++ b/cpp/src/community/detail/refine_mg_v32_e32.cu @@ -37,8 +37,7 @@ refine_clustering( edge_src_property_t, int32_t> const& src_clusters_cache, edge_dst_property_t, int32_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> @@ -59,8 +58,7 @@ refine_clustering( edge_src_property_t, int32_t> const& src_clusters_cache, edge_dst_property_t, int32_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/refine_mg_v64_e64.cu b/cpp/src/community/detail/refine_mg_v64_e64.cu index 1a2ed665b8a..d870f30cd3c 100644 --- a/cpp/src/community/detail/refine_mg_v64_e64.cu +++ b/cpp/src/community/detail/refine_mg_v64_e64.cu @@ -37,8 +37,7 @@ refine_clustering( edge_src_property_t, int64_t> const& src_clusters_cache, edge_dst_property_t, int64_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> @@ -59,8 +58,7 @@ refine_clustering( edge_src_property_t, int64_t> const& src_clusters_cache, edge_dst_property_t, int64_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/refine_sg_v32_e32.cu b/cpp/src/community/detail/refine_sg_v32_e32.cu index ac0ede8225d..803a37474d4 100644 --- a/cpp/src/community/detail/refine_sg_v32_e32.cu +++ b/cpp/src/community/detail/refine_sg_v32_e32.cu @@ -37,8 +37,7 @@ refine_clustering( edge_src_property_t, int32_t> const& src_clusters_cache, edge_dst_property_t, int32_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> @@ -59,8 +58,7 @@ refine_clustering( edge_src_property_t, int32_t> const& src_clusters_cache, edge_dst_property_t, int32_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/refine_sg_v64_e64.cu b/cpp/src/community/detail/refine_sg_v64_e64.cu index 97ed43b3de0..7b8bc435bc3 100644 --- a/cpp/src/community/detail/refine_sg_v64_e64.cu +++ b/cpp/src/community/detail/refine_sg_v64_e64.cu @@ -37,8 +37,7 @@ refine_clustering( edge_src_property_t, int64_t> const& src_clusters_cache, edge_dst_property_t, int64_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> @@ -59,8 +58,7 @@ refine_clustering( edge_src_property_t, int64_t> const& src_clusters_cache, edge_dst_property_t, int64_t> const& - dst_clusters_cache, - bool up_down); + dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index da790a5dd66..c3600ff12e0 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -102,7 +102,8 @@ std::pair>, weight_t> leiden( HighResTimer hr_timer{}; #endif - weight_t best_modularity = weight_t{-1.0}; + weight_t final_Q{-1}; + weight_t total_edge_weight = compute_total_edge_weight(handle, current_graph_view, *current_edge_weight_view); @@ -368,9 +369,6 @@ std::pair>, weight_t> leiden( detail::timer_stop(handle, hr_timer); #endif - bool terminate = (cur_Q <= best_modularity); - if (!terminate) { best_modularity = cur_Q; } - #ifdef TIMING detail::timer_start(handle, hr_timer, "contract graph"); #endif @@ -386,8 +384,7 @@ std::pair>, weight_t> leiden( auto nr_unique_louvain_clusters = remove_duplicates(handle, copied_louvain_partition); - terminate = - terminate || (nr_unique_louvain_clusters == current_graph_view.number_of_vertices()); + bool terminate = (nr_unique_louvain_clusters == current_graph_view.number_of_vertices()); rmm::device_uvector refined_leiden_partition(0, handle.get_stream()); std::pair, rmm::device_uvector> leiden_to_louvain_map{ @@ -426,11 +423,19 @@ std::pair>, weight_t> leiden( std::move(louvain_assignment_for_vertices), src_vertex_weights_cache, src_louvain_assignment_cache, - dst_louvain_assignment_cache, - up_down); + dst_louvain_assignment_cache); } // Clear buffer and contract the graph + final_Q = detail::compute_modularity(handle, + current_graph_view, + current_edge_weight_view, + src_louvain_assignment_cache, + dst_louvain_assignment_cache, + louvain_assignment_for_vertices, + cluster_weights, + total_edge_weight, + resolution); cluster_keys.resize(0, handle.get_stream()); cluster_weights.resize(0, handle.get_stream()); @@ -445,6 +450,9 @@ std::pair>, weight_t> leiden( dst_louvain_assignment_cache.clear(handle); if (!terminate) { + src_louvain_assignment_cache.clear(handle); + dst_louvain_assignment_cache.clear(handle); + auto nr_unique_leiden = static_cast(leiden_to_louvain_map.first.size()); if (graph_view_t::is_multi_gpu) { nr_unique_leiden = host_scalar_allreduce( @@ -586,7 +594,7 @@ std::pair>, weight_t> leiden( detail::timer_display(handle, hr_timer, std::cout); #endif - return std::make_pair(std::move(dendrogram), best_modularity); + return std::make_pair(std::move(dendrogram), final_Q); } template From f917ae4ad200258f1afb7d2f70ee200828b88479 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 23 Oct 2024 14:49:59 -0500 Subject: [PATCH 5/5] re-run all CI when files in .github/workflows change (#4723) Removes some exclusions left behind in #4634, to ensure that all CI is re-run when files in `.github/workflows` are chagned. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/cugraph/pull/4723 --- .github/workflows/pr.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b0a1308237e..0a33f5488a6 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -55,16 +55,12 @@ jobs: - '!notebooks/**' - '!python/**' - '!readme_pages/**' - # TODO: Remove this before merging - - '!.github/**' test_notebooks: - '**' - '!.devcontainers/**' - '!CONTRIBUTING.md' - '!README.md' - '!docs/**' - # TODO: Remove this before merging - - '!.github/**' test_python: - '**' - '!.devcontainers/**' @@ -73,8 +69,6 @@ jobs: - '!docs/**' - '!img/**' - '!notebooks/**' - # TODO: Remove this before merging - - '!.github/**' checks: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12