From a54188d9f2368f5b3f863316a62a6bc3c44cb7f9 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 19 Sep 2024 14:24:25 -0500 Subject: [PATCH 1/2] Passes drop_multi_edges to PLC SGGraph ctor and adds test using pagerank to ensure Graphs w/out multi edges are created and that MultiGraphs are still created correctly. --- python/nx-cugraph/nx_cugraph/classes/graph.py | 9 +++++ .../nx_cugraph/tests/test_pagerank.py | 36 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_pagerank.py diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 7425eacb2b4..7c01365c0ac 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -689,6 +689,14 @@ def _get_plc_graph( src_indices = src_indices.astype(index_dtype) dst_indices = dst_indices.astype(index_dtype) + # This sets drop_multi_edges=True for non-multigraph input, which means + # the data in self.src_indices and self.dst_indices may not be + # identical to that contained in the returned pcl.SGGraph (the returned + # SGGraph may have fewer edges since duplicates are dropped). Ideally + # self.src_indices and self.dst_indices would be updated to have + # duplicate edges removed for non-multigraph instances, but that + # requires additional code which would be redundant and likely not as + # performant as the code in PLC. return plc.SGGraph( resource_handle=plc.ResourceHandle(), graph_properties=plc.GraphProperties( @@ -702,6 +710,7 @@ def _get_plc_graph( renumber=False, do_expensive_check=False, vertices_array=self._node_ids, + drop_multi_edges=not self.is_multigraph(), ) def _sort_edge_indices(self, primary="src"): diff --git a/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py b/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py new file mode 100644 index 00000000000..4aab6807dfe --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pandas as pd +from pytest import approx + + +def test_pagerank_multigraph(): + """ + Ensures correct differences between pagerank results for Graphs + vs. MultiGraphs generated using from_pandas_edgelist() + """ + df = pd.DataFrame({"source": [0, 1, 1, 1, 1, 1, 1, 2], + "target": [1, 2, 2, 2, 2, 2, 2, 3]}) + expected_pr_for_G = nx.pagerank(nx.from_pandas_edgelist(df)) + expected_pr_for_MultiG = nx.pagerank(nx.from_pandas_edgelist( + df, create_using=nx.MultiGraph)) + + G = nx.from_pandas_edgelist(df, backend="cugraph") + actual_pr_for_G = nx.pagerank(G, backend="cugraph") + + MultiG = nx.from_pandas_edgelist(df, create_using=nx.MultiGraph, backend="cugraph") + actual_pr_for_MultiG = nx.pagerank(MultiG, backend="cugraph") + + assert actual_pr_for_G == approx(expected_pr_for_G) + assert actual_pr_for_MultiG == approx(expected_pr_for_MultiG) From 8797907c651f4fd8b9385b22610f62db3acc9346 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 19 Sep 2024 14:27:45 -0500 Subject: [PATCH 2/2] Formatting --- python/nx-cugraph/nx_cugraph/tests/test_pagerank.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py b/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py index 4aab6807dfe..0b437df2d2f 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py @@ -23,8 +23,8 @@ def test_pagerank_multigraph(): df = pd.DataFrame({"source": [0, 1, 1, 1, 1, 1, 1, 2], "target": [1, 2, 2, 2, 2, 2, 2, 3]}) expected_pr_for_G = nx.pagerank(nx.from_pandas_edgelist(df)) - expected_pr_for_MultiG = nx.pagerank(nx.from_pandas_edgelist( - df, create_using=nx.MultiGraph)) + expected_pr_for_MultiG = nx.pagerank( + nx.from_pandas_edgelist(df, create_using=nx.MultiGraph)) G = nx.from_pandas_edgelist(df, backend="cugraph") actual_pr_for_G = nx.pagerank(G, backend="cugraph")