Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into fea/mark_kernels_as_internal
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmaynard authored Jan 22, 2024
2 parents 2c1d5eb + 77d833a commit 2066ce1
Show file tree
Hide file tree
Showing 10 changed files with 308 additions and 37 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -35,11 +35,9 @@
from cugraph.structure.number_map import NumberMap
from cugraph.structure.symmetrize import symmetrize
from cugraph.dask.common.part_utils import (
get_persisted_df_worker_map,
persist_dask_df_equal_parts_per_worker,
)
from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster
from cugraph.dask import get_n_workers
import cugraph.dask.comms.comms as Comms


Expand Down Expand Up @@ -825,12 +823,13 @@ def get_two_hop_neighbors(self, start_vertices=None):
_client = default_client()

def _call_plc_two_hop_neighbors(sID, mg_graph_x, start_vertices):
return pylibcugraph_get_two_hop_neighbors(
results_ = pylibcugraph_get_two_hop_neighbors(
resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
graph=mg_graph_x,
start_vertices=start_vertices,
do_expensive_check=False,
)
return results_

if isinstance(start_vertices, int):
start_vertices = [start_vertices]
Expand All @@ -845,31 +844,31 @@ def _call_plc_two_hop_neighbors(sID, mg_graph_x, start_vertices):
else:
start_vertices_type = self.input_df.dtypes[0]

if not isinstance(start_vertices, (dask_cudf.Series)):
start_vertices = dask_cudf.from_cudf(
start_vertices = start_vertices.astype(start_vertices_type)

def create_iterable_args(
session_id, input_graph, start_vertices=None, npartitions=None
):
session_id_it = [session_id] * npartitions
graph_it = input_graph.values()
start_vertices = cp.array_split(start_vertices.values, npartitions)
return [
session_id_it,
graph_it,
start_vertices,
npartitions=min(self._npartitions, len(start_vertices)),
)
start_vertices = start_vertices.astype(start_vertices_type)
]

n_workers = get_n_workers()
start_vertices = start_vertices.repartition(npartitions=n_workers)
start_vertices = persist_dask_df_equal_parts_per_worker(
start_vertices, _client
result = _client.map(
_call_plc_two_hop_neighbors,
*create_iterable_args(
Comms.get_session_id(),
self._plc_graph,
start_vertices,
self._npartitions,
),
pure=False,
)
start_vertices = get_persisted_df_worker_map(start_vertices, _client)

result = [
_client.submit(
_call_plc_two_hop_neighbors,
Comms.get_session_id(),
self._plc_graph[w],
start_vertices[w][0],
workers=[w],
allow_other_workers=False,
)
for w in start_vertices.keys()
]
else:
result = [
_client.submit(
Expand All @@ -896,7 +895,8 @@ def convert_to_cudf(cp_arrays):
return df

cudf_result = [
_client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result
_client.submit(convert_to_cudf, cp_arrays, pure=False)
for cp_arrays in result
]

wait(cudf_result)
Expand Down
10 changes: 10 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"functions": {
# BEGIN: functions
"ancestors",
"average_clustering",
"barbell_graph",
"betweenness_centrality",
"bfs_edges",
Expand All @@ -41,10 +42,12 @@
"caveman_graph",
"chvatal_graph",
"circular_ladder_graph",
"clustering",
"complete_bipartite_graph",
"complete_graph",
"complete_multipartite_graph",
"connected_components",
"core_number",
"cubical_graph",
"cycle_graph",
"davis_southern_women_graph",
Expand All @@ -68,6 +71,7 @@
"house_x_graph",
"icosahedral_graph",
"in_degree_centrality",
"is_bipartite",
"is_connected",
"is_isolate",
"is_strongly_connected",
Expand Down Expand Up @@ -104,6 +108,8 @@
"strongly_connected_components",
"tadpole_graph",
"tetrahedral_graph",
"transitivity",
"triangles",
"trivial_graph",
"truncated_cube_graph",
"truncated_tetrahedron_graph",
Expand All @@ -115,11 +121,14 @@
},
"extra_docstrings": {
# BEGIN: extra_docstrings
"average_clustering": "Directed graphs and `weight` parameter are not yet supported.",
"betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
"bfs_edges": "`sort_neighbors` parameter is not yet supported.",
"bfs_predecessors": "`sort_neighbors` parameter is not yet supported.",
"bfs_successors": "`sort_neighbors` parameter is not yet supported.",
"bfs_tree": "`sort_neighbors` parameter is not yet supported.",
"clustering": "Directed graphs and `weight` parameter are not yet supported.",
"core_number": "Directed graphs are not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
"eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.",
"from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.",
Expand All @@ -131,6 +140,7 @@
"katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.",
"louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.",
"pagerank": "`dangling` parameter is not supported, but it is checked for validity.",
"transitivity": "Directed graphs are not yet supported.",
# END: extra_docstrings
},
"extra_parameters": {
Expand Down
4 changes: 3 additions & 1 deletion python/nx-cugraph/nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@
from . import (
bipartite,
centrality,
cluster,
community,
components,
link_analysis,
shortest_paths,
traversal,
)
from .bipartite import complete_bipartite_graph
from .bipartite import complete_bipartite_graph, is_bipartite
from .centrality import *
from .cluster import *
from .components import *
from .core import *
from .dag import *
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -10,4 +10,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .basic import *
from .generators import *
31 changes: 31 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp

from nx_cugraph.algorithms.cluster import _triangles
from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import networkx_algorithm

__all__ = [
"is_bipartite",
]


@networkx_algorithm(plc="triangle_count", version_added="24.02")
def is_bipartite(G):
G = _to_graph(G)
# Counting triangles may not be the fastest way to do this, but it is simple.
node_ids, triangles, is_single_node = _triangles(
G, None, symmetrize="union" if G.is_directed() else None
)
return int(cp.count_nonzero(triangles)) == 0
136 changes: 136 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp
import pylibcugraph as plc

from nx_cugraph.convert import _to_undirected_graph
from nx_cugraph.utils import networkx_algorithm, not_implemented_for

__all__ = [
"triangles",
"average_clustering",
"clustering",
"transitivity",
]


def _triangles(G, nodes, symmetrize=None):
if nodes is not None:
if is_single_node := (nodes in G):
nodes = [nodes if G.key_to_id is None else G.key_to_id[nodes]]
else:
nodes = list(nodes)
nodes = G._list_to_nodearray(nodes)
else:
is_single_node = False
if len(G) == 0:
return None, None, is_single_node
node_ids, triangles = plc.triangle_count(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(symmetrize=symmetrize),
start_list=nodes,
do_expensive_check=False,
)
return node_ids, triangles, is_single_node


@not_implemented_for("directed")
@networkx_algorithm(plc="triangle_count", version_added="24.02")
def triangles(G, nodes=None):
G = _to_undirected_graph(G)
node_ids, triangles, is_single_node = _triangles(G, nodes)
if len(G) == 0:
return {}
if is_single_node:
return int(triangles[0])
return G._nodearrays_to_dict(node_ids, triangles)


@not_implemented_for("directed")
@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02")
def clustering(G, nodes=None, weight=None):
"""Directed graphs and `weight` parameter are not yet supported."""
G = _to_undirected_graph(G)
node_ids, triangles, is_single_node = _triangles(G, nodes)
if len(G) == 0:
return {}
if is_single_node:
numer = int(triangles[0])
if numer == 0:
return 0
degree = int((G.src_indices == nodes).sum())
return 2 * numer / (degree * (degree - 1))
degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
denom = degrees * (degrees - 1)
results = 2 * triangles / denom
results = cp.where(denom, results, 0) # 0 where we divided by 0
return G._nodearrays_to_dict(node_ids, results)


@clustering._can_run
def _(G, nodes=None, weight=None):
return weight is None and not G.is_directed()


@not_implemented_for("directed")
@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02")
def average_clustering(G, nodes=None, weight=None, count_zeros=True):
"""Directed graphs and `weight` parameter are not yet supported."""
G = _to_undirected_graph(G)
node_ids, triangles, is_single_node = _triangles(G, nodes)
if len(G) == 0:
raise ZeroDivisionError
degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
if not count_zeros:
mask = triangles != 0
triangles = triangles[mask]
if triangles.size == 0:
raise ZeroDivisionError
degrees = degrees[mask]
denom = degrees * (degrees - 1)
results = 2 * triangles / denom
if count_zeros:
results = cp.where(denom, results, 0) # 0 where we divided by 0
return float(results.mean())


@average_clustering._can_run
def _(G, nodes=None, weight=None, count_zeros=True):
return weight is None and not G.is_directed()


@not_implemented_for("directed")
@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02")
def transitivity(G):
"""Directed graphs are not yet supported."""
G = _to_undirected_graph(G)
if len(G) == 0:
return 0
node_ids, triangles = plc.triangle_count(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
start_list=None,
do_expensive_check=False,
)
numer = int(triangles.sum())
if numer == 0:
return 0
degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
denom = int((degrees * (degrees - 1)).sum())
return 2 * numer / denom


@transitivity._can_run
def _(G):
# Is transitivity supposed to work on directed graphs?
return not G.is_directed()
30 changes: 29 additions & 1 deletion python/nx-cugraph/nx_cugraph/algorithms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,42 @@
import pylibcugraph as plc

import nx_cugraph as nxcg
from nx_cugraph.convert import _to_undirected_graph
from nx_cugraph.utils import (
_get_int_dtype,
index_dtype,
networkx_algorithm,
not_implemented_for,
)

__all__ = ["k_truss"]
__all__ = ["core_number", "k_truss"]


@not_implemented_for("directed")
@not_implemented_for("multigraph")
@networkx_algorithm(is_incomplete=True, plc="core_number", version_added="24.02")
def core_number(G):
"""Directed graphs are not yet supported."""
G = _to_undirected_graph(G)
if len(G) == 0:
return {}
if nxcg.number_of_selfloops(G) > 0:
raise nx.NetworkXNotImplemented(
"Input graph has self loops which is not permitted; "
"Consider using G.remove_edges_from(nx.selfloop_edges(G))."
)
node_ids, core_numbers = plc.core_number(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
degree_type="bidirectional",
do_expensive_check=False,
)
return G._nodearrays_to_dict(node_ids, core_numbers)


@core_number._can_run
def _(G):
return not G.is_directed()


@not_implemented_for("directed")
Expand Down
Loading

0 comments on commit 2066ce1

Please sign in to comment.