Skip to content

Commit

Permalink
Merge pull request #3899 from rapidsai/branch-23.10
Browse files Browse the repository at this point in the history
Forward-merge branch-23.10 to branch-23.12
  • Loading branch information
GPUtester authored Sep 29, 2023
2 parents 0e7df55 + 6e5e066 commit b42d9f8
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 20 deletions.
91 changes: 73 additions & 18 deletions python/cugraph/cugraph/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union, Tuple
from cugraph.structure import Graph
from cugraph.utilities import (
is_nx_graph_type,
ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
Expand All @@ -21,9 +24,26 @@
from pylibcugraph import louvain as pylibcugraph_louvain
from pylibcugraph import ResourceHandle

from cugraph.utilities.utils import import_optional

# FIXME: the networkx.Graph type used in type annotations is specified
# using a string literal to avoid depending on and importing networkx.
# Instead, networkx is imported optionally, which may cause a problem
# for a type checker if run in an environment where networkx is not installed.
networkx = import_optional("networkx")

VERTEX_COL_NAME = "vertex"
CLUSTER_ID_COL_NAME = "partition"


# FIXME: max_level should default to 100 once max_iter is removed
def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
def louvain(
G: Union[Graph, "networkx.Graph"],
max_level: Union[int, None] = None,
max_iter: Union[int, None] = None,
resolution: float = 1.0,
threshold: float = 1e-7,
) -> Tuple[Union[cudf.DataFrame, dict], float]:
"""
Compute the modularity optimizing partition of the input graph using the
Louvain method
Expand All @@ -48,6 +68,9 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
than the specified number of levels. No error occurs when the
algorithm terminates early in this manner.
If max_level > 500, it will be set to 500 and a warning is emitted
in order to prevent excessive runtime.
max_iter : integer, optional (default=None)
This parameter is deprecated in favor of max_level. Previously
it was used to control the maximum number of levels of the Louvain
Expand All @@ -68,18 +91,21 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
Returns
-------
parts : cudf.DataFrame
GPU data frame of size V containing two columns the vertex id and the
partition id it is assigned to.
result: cudf.DataFrame or dict
If input graph G is of type cugraph.Graph, a GPU dataframe
with two columns.
result[VERTEX_COL_NAME] : cudf.Series
Contains the vertex identifiers
result[CLUSTER_ID_COL_NAME] : cudf.Series
Contains the partition assigned to the vertices
df['vertex'] : cudf.Series
Contains the vertex identifiers
df['partition'] : cudf.Series
Contains the partition assigned to the vertices
If input graph G is of type networkx.Graph, a dict
Dictionary of vertices and their partition ids.
modularity_score : float
a floating point number containing the global modularity score of the
partitioning.
A floating point number containing the global modularity score
of the partitioning.
Examples
--------
Expand All @@ -89,6 +115,17 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
"""

# FIXME: Onece the graph construction calls support isolated vertices through
# the C API (the C++ interface already supports this) then there will be
# no need to compute isolated vertices here.

isolated_vertices = list()
if is_nx_graph_type(type(G)):
isolated_vertices = [v for v in range(G.number_of_nodes()) if G.degree[v] == 0]
else:
# FIXME: Gather isolated vertices of G
pass

G, isNx = ensure_cugraph_obj_for_nx(G)

if G.is_directed():
Expand All @@ -112,7 +149,12 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
if max_level is None:
max_level = 100

vertex, partition, mod_score = pylibcugraph_louvain(
if max_level > 500:
w_msg = "max_level is set too high, clamping it down to 500."
warnings.warn(w_msg)
max_level = 500

vertex, partition, modularity_score = pylibcugraph_louvain(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
max_level=max_level,
Expand All @@ -121,14 +163,27 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
do_expensive_check=False,
)

df = cudf.DataFrame()
df["vertex"] = vertex
df["partition"] = partition
result = cudf.DataFrame()
result[VERTEX_COL_NAME] = vertex
result[CLUSTER_ID_COL_NAME] = partition

if len(isolated_vertices) > 0:
unique_cids = result[CLUSTER_ID_COL_NAME].unique()
max_cluster_id = -1 if len(result) == 0 else unique_cids.max()

isolated_vtx_and_cids = cudf.DataFrame()
isolated_vtx_and_cids[VERTEX_COL_NAME] = isolated_vertices
isolated_vtx_and_cids[CLUSTER_ID_COL_NAME] = [
(max_cluster_id + i + 1) for i in range(len(isolated_vertices))
]
result = cudf.concat(
[result, isolated_vtx_and_cids], ignore_index=True, sort=False
)

if G.renumbered:
df = G.unrenumber(df, "vertex")
if G.renumbered and len(G.input_df) > 0:
result = G.unrenumber(result, VERTEX_COL_NAME)

if isNx is True:
df = df_score_to_dictionary(df, "partition")
result = df_score_to_dictionary(result, CLUSTER_ID_COL_NAME)

return df, mod_score
return result, modularity_score
16 changes: 16 additions & 0 deletions python/cugraph/cugraph/tests/community/test_louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,19 @@ def test_louvain_csr_graph(is_weighted):

assert len(parition_diffs) == 0
assert mod_csr == mod_coo


@pytest.mark.sg
def test_louvain_nx_graph_with_isolated_nodes():
# Cluster IDs are expected to unique if all nodes are isolated
G = nx.Graph()
G.add_nodes_from(range(5))
result, _ = cugraph.louvain(G)
assert set(result.keys()) == set(G.nodes)
assert len(set(result.values())) == G.number_of_nodes()

# A graph with 5 nodes, where 3 of the nodes are isolated
G.add_edge(1, 2)
result, _ = cugraph.louvain(G)
assert set(result.keys()) == set(G.nodes)
assert len(set(result.values())) == G.number_of_nodes() - 1
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,8 @@ def is_matrix_type(m):
return is_cp_matrix_type(m) or is_sp_matrix_type(m)


def is_nx_graph_type(g):
return g in __nx_graph_types
def is_nx_graph_type(graph_type):
return graph_type in __nx_graph_types


def is_cugraph_graph_type(g):
Expand Down

0 comments on commit b42d9f8

Please sign in to comment.