Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds logic to handle isolated vertices at python layer #3886

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 39 additions & 8 deletions python/cugraph/cugraph/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# limitations under the License.

from cugraph.utilities import (
is_nx_graph_type,
ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
Expand All @@ -21,6 +22,9 @@
from pylibcugraph import louvain as pylibcugraph_louvain
from pylibcugraph import ResourceHandle

VERTEX_COL_NAME = "vertex"
CLUSTER_ID_COL_NAME = "partition"


# FIXME: max_level should default to 100 once max_iter is removed
def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
Expand Down Expand Up @@ -72,9 +76,9 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
GPU data frame of size V containing two columns the vertex id and the
partition id it is assigned to.

naimnv marked this conversation as resolved.
Show resolved Hide resolved
df['vertex'] : cudf.Series
result_df[VERTEX_COL_NAME] : cudf.Series
Contains the vertex identifiers
df['partition'] : cudf.Series
result_df[CLUSTER_ID_COL_NAME] : cudf.Series
Contains the partition assigned to the vertices

modularity_score : float
Expand All @@ -89,6 +93,17 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):

"""

# FIXME: Onece the graph construction calls support isolated vertices through
# the C API (the C++ interface already supports this) then there will be
# no need to compute isolated vertices here.

isolated_vertices = list()
naimnv marked this conversation as resolved.
Show resolved Hide resolved
if is_nx_graph_type(type(G)):
isolated_vertices = [v for v in range(G.number_of_nodes()) if G.degree[v] == 0]
naimnv marked this conversation as resolved.
Show resolved Hide resolved
else:
# FIXME: Gather isolated vertices of G
pass

G, isNx = ensure_cugraph_obj_for_nx(G)

if G.is_directed():
Expand All @@ -112,6 +127,9 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
if max_level is None:
max_level = 100

if max_level > 1000:
max_level = 1000
naimnv marked this conversation as resolved.
Show resolved Hide resolved

vertex, partition, mod_score = pylibcugraph_louvain(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
Expand All @@ -121,14 +139,27 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
do_expensive_check=False,
)

df = cudf.DataFrame()
df["vertex"] = vertex
df["partition"] = partition
result_df = cudf.DataFrame()
result_df[VERTEX_COL_NAME] = vertex
result_df[CLUSTER_ID_COL_NAME] = partition

if isNx and len(isolated_vertices) > 0:
unique_cids = result_df[CLUSTER_ID_COL_NAME].unique()
max_cluster_id = -1 if len(result_df) == 0 else unique_cids.max()

isolated_vtx_and_cids = cudf.DataFrame()
isolated_vtx_and_cids[VERTEX_COL_NAME] = isolated_vertices
isolated_vtx_and_cids[CLUSTER_ID_COL_NAME] = [
(max_cluster_id + i + 1) for i in range(len(isolated_vertices))
]
result_df = cudf.concat(
[result_df, isolated_vtx_and_cids], ignore_index=True, sort=False
)

if G.renumbered:
df = G.unrenumber(df, "vertex")
result_df = G.unrenumber(result_df, VERTEX_COL_NAME)

if isNx is True:
df = df_score_to_dictionary(df, "partition")
result_df = df_score_to_dictionary(result_df, CLUSTER_ID_COL_NAME)

return df, mod_score
return result_df, mod_score
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,8 @@ def is_matrix_type(m):
return is_cp_matrix_type(m) or is_sp_matrix_type(m)


def is_nx_graph_type(g):
return g in __nx_graph_types
def is_nx_graph_type(graph_type):
return graph_type in __nx_graph_types


def is_cugraph_graph_type(g):
Expand Down