Skip to content

Commit

Permalink
Updates READMEs, updates core_number to properly ignore `degree_typ…
Browse files Browse the repository at this point in the history
…e`, minor cleanup (#4776)

* updates READMEs to remove outdated nx-cugraph text
* updates `core_number` docs, APIs, tests to properly ignore `degree_type` due to `core_number` not supporting directed graphs which `degree_type` is intended for - `degree_type` settings will be honored when directed graphs are supported.
* renames test helper function for clarity
* fixes issue with datasets API to properly recreate the edgelist for MG (dask) if previously created for SG.

Authors:
  - Rick Ratzel (https://github.com/rlratzel)

Approvers:
  - Don Acosta (https://github.com/acostadon)
  - Alex Barghi (https://github.com/alexbarghi-nv)
  - Brad Rees (https://github.com/BradReesWork)

URL: #4776
  • Loading branch information
rlratzel authored Nov 25, 2024
1 parent a977755 commit e155a8f
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 97 deletions.
10 changes: 0 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,6 @@

</div>

-----
## News

___NEW!___ _[nx-cugraph](https://rapids.ai/nx-cugraph/)_, a NetworkX backend that provides GPU acceleration to NetworkX with zero code change.
```
> pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com
> export NETWORKX_AUTOMATIC_BACKENDS=cugraph
```
That's it. NetworkX now leverages cuGraph for accelerated graph algorithms.

-----

## Table of contents
Expand Down
41 changes: 24 additions & 17 deletions python/cugraph/cugraph/cores/core_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,27 @@
def core_number(G, degree_type="bidirectional"):
"""
Compute the core numbers for the nodes of the graph G. A k-core of a graph
is a maximal subgraph that contains nodes of degree k or more.
A node has a core number of k if it belongs a k-core but not to k+1-core.
This call does not support a graph with self-loops and parallel
edges.
is a maximal subgraph that contains nodes of degree k or more. A node has
a core number of k if it belongs to a k-core but not to k+1-core. This
call does not support a graph with self-loops and parallel edges.
Parameters
----------
G : cuGraph.Graph or networkx.Graph
The graph should contain undirected edges where undirected edges are
represented as directed edges in both directions. While this graph
can contain edge weights, they don't participate in the calculation
The current implementation only supports undirected graphs. The graph
can contain edge weights, but they don't participate in the calculation
of the core numbers.
The current implementation only supports undirected graphs.
.. deprecated:: 24.12
Accepting a ``networkx.Graph`` is deprecated and will be removed in a
future version. For ``networkx.Graph`` use networkx directly with
the ``nx-cugraph`` backend. See: https://rapids.ai/nx-cugraph/
degree_type: str, (default="bidirectional")
This option determines if the core number computation should be based
on input, output, or both directed edges, with valid values being
"incoming", "outgoing", and "bidirectional" respectively.
This option is currently ignored. This option may eventually determine
if the core number computation should be based on input, output, or
both directed edges, with valid values being "incoming", "outgoing",
and "bidirectional" respectively.
Returns
-------
Expand All @@ -63,19 +61,28 @@ def core_number(G, degree_type="bidirectional"):
>>> from cugraph.datasets import karate
>>> G = karate.get_graph(download=True)
>>> df = cugraph.core_number(G)
>>> df.head()
vertex core_number
0 33 4
1 0 4
2 32 4
3 2 4
4 1 4
"""

G, isNx = ensure_cugraph_obj_for_nx(G)

if G.is_directed():
raise ValueError("input graph must be undirected")

if degree_type not in ["incoming", "outgoing", "bidirectional"]:
raise ValueError(
f"'degree_type' must be either incoming, "
f"outgoing or bidirectional, got: {degree_type}"
)
# degree_type is currently ignored until libcugraph supports directed
# graphs for core_number. Once supporteed, degree_type should be checked
# like so:
# if degree_type not in ["incoming", "outgoing", "bidirectional"]:
# raise ValueError(
# f"'degree_type' must be either incoming, "
# f"outgoing or bidirectional, got: {degree_type}"
# )

vertex, core_number = pylibcugraph_core_number(
resource_handle=ResourceHandle(),
Expand Down
29 changes: 16 additions & 13 deletions python/cugraph/cugraph/dask/cores/core_number.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,15 +53,15 @@ def core_number(input_graph, degree_type="bidirectional"):
Parameters
----------
input_graph : cugraph.graph
cuGraph graph descriptor, should contain the connectivity information,
(edge weights are not used in this algorithm).
The current implementation only supports undirected graphs.
The current implementation only supports undirected graphs. The graph
can contain edge weights, but they don't participate in the calculation
of the core numbers.
degree_type: str, (default="bidirectional")
This option determines if the core number computation should be based
on input, output, or both directed edges, with valid values being
"incoming", "outgoing", and "bidirectional" respectively.
This option is currently ignored. This option may eventually determine
if the core number computation should be based on input, output, or
both directed edges, with valid values being "incoming", "outgoing",
and "bidirectional" respectively.
Returns
-------
Expand All @@ -77,11 +77,14 @@ def core_number(input_graph, degree_type="bidirectional"):
if input_graph.is_directed():
raise ValueError("input graph must be undirected")

if degree_type not in ["incoming", "outgoing", "bidirectional"]:
raise ValueError(
f"'degree_type' must be either incoming, "
f"outgoing or bidirectional, got: {degree_type}"
)
# degree_type is currently ignored until libcugraph supports directed
# graphs for core_number. Once supporteed, degree_type should be checked
# like so:
# if degree_type not in ["incoming", "outgoing", "bidirectional"]:
# raise ValueError(
# f"'degree_type' must be either incoming, "
# f"outgoing or bidirectional, got: {degree_type}"
# )

# Initialize dask client
client = default_client()
Expand Down
4 changes: 3 additions & 1 deletion python/cugraph/cugraph/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,9 @@ def get_dask_graph(
If True, stores the transpose of the adjacency matrix. Required
for certain algorithms.
"""
if self._edgelist is None:
if self._edgelist is None or not isinstance(
self._edgelist, dask_cudf.DataFrame
):
self.get_dask_edgelist(download=download)

if create_using is None:
Expand Down
24 changes: 13 additions & 11 deletions python/cugraph/cugraph/tests/core/test_core_number.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -32,11 +32,15 @@ def setup_function():
# =============================================================================
# Pytest fixtures
# =============================================================================
degree_type = ["incoming", "outgoing"]
# FIXME: degree_type is currently unsupported (ignored)
# degree_type = ["incoming", "outgoing"]

# fixture_params = gen_fixture_params_product(
# (UNDIRECTED_DATASETS, "graph_file"),
# (degree_type, "degree_type"),
# )
fixture_params = gen_fixture_params_product(
(UNDIRECTED_DATASETS, "graph_file"),
(degree_type, "degree_type"),
)


Expand All @@ -46,7 +50,9 @@ def input_combo(request):
This fixture returns a dictionary containing all input params required to
run a Core number algo
"""
parameters = dict(zip(("graph_file", "degree_type"), request.param))
# FIXME: degree_type is not supported so do not test with different values
# parameters = dict(zip(("graph_file", "degree_type"), request.param))
parameters = {"graph_file": request.param[0]}

graph_file = parameters["graph_file"]
G = graph_file.get_graph()
Expand All @@ -69,7 +75,8 @@ def input_combo(request):
def test_core_number(input_combo):
G = input_combo["G"]
Gnx = input_combo["Gnx"]
degree_type = input_combo["degree_type"]
# FIXME: degree_type is currently unsupported (ignored)
# degree_type = input_combo["degree_type"]
nx_core_number_results = cudf.DataFrame()

dic_results = nx.core_number(Gnx)
Expand All @@ -80,7 +87,7 @@ def test_core_number(input_combo):
)

core_number_results = (
cugraph.core_number(G, degree_type)
cugraph.core_number(G)
.sort_values("vertex")
.reset_index(drop=True)
.rename(columns={"core_number": "cugraph_core_number"})
Expand Down Expand Up @@ -109,8 +116,3 @@ def test_core_number_invalid_input(input_combo):

with pytest.raises(ValueError):
cugraph.core_number(G)

invalid_degree_type = "invalid"
G = input_combo["G"]
with pytest.raises(ValueError):
cugraph.core_number(G, invalid_degree_type)
33 changes: 12 additions & 21 deletions python/cugraph/cugraph/tests/core/test_core_number_mg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import cugraph
import cugraph.dask as dcg
from cugraph.datasets import karate, dolphins, karate_asymmetric
from cugraph.datasets import karate, dolphins


# =============================================================================
Expand All @@ -35,17 +35,18 @@ def setup_function():


DATASETS = [karate, dolphins]
DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"]
# FIXME: degree_type is currently unsupported (ignored)
# DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"]


# =============================================================================
# Helper Functions
# =============================================================================


def get_sg_results(dataset, degree_type):
def get_sg_results(dataset):
G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
res = cugraph.core_number(G, degree_type)
res = cugraph.core_number(G)
res = res.sort_values("vertex").reset_index(drop=True)
return res

Expand All @@ -57,23 +58,23 @@ def get_sg_results(dataset, degree_type):

@pytest.mark.mg
@pytest.mark.parametrize("dataset", DATASETS)
@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
def test_sg_core_number(dask_client, dataset, degree_type, benchmark):
# @pytest.mark.parametrize("degree_type", DEGREE_TYPE)
def test_sg_core_number(dask_client, dataset, benchmark):
# This test is only for benchmark purposes.
sg_core_number_results = None
G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
sg_core_number_results = benchmark(cugraph.core_number, G, degree_type)
sg_core_number_results = benchmark(cugraph.core_number, G)
assert sg_core_number_results is not None


@pytest.mark.mg
@pytest.mark.parametrize("dataset", DATASETS)
@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
def test_core_number(dask_client, dataset, degree_type, benchmark):
# @pytest.mark.parametrize("degree_type", DEGREE_TYPE)
def test_core_number(dask_client, dataset, benchmark):
dataset.get_dask_edgelist(download=True) # reload with MG edgelist
dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))

result_core_number = benchmark(dcg.core_number, dg, degree_type)
result_core_number = benchmark(dcg.core_number, dg)
result_core_number = (
result_core_number.drop_duplicates()
.compute()
Expand All @@ -82,21 +83,11 @@ def test_core_number(dask_client, dataset, degree_type, benchmark):
.rename(columns={"core_number": "mg_core_number"})
)

expected_output = get_sg_results(dataset, degree_type)
expected_output = get_sg_results(dataset)

# Update the mg core number with sg core number results
# for easy comparison using cuDF DataFrame methods.
result_core_number["sg_core_number"] = expected_output["core_number"]
counts_diffs = result_core_number.query("mg_core_number != sg_core_number")

assert len(counts_diffs) == 0


@pytest.mark.mg
def test_core_number_invalid_input():
dg = karate_asymmetric.get_graph(create_using=cugraph.Graph(directed=True))

invalid_degree_type = 3

with pytest.raises(ValueError):
dcg.core_number(dg, invalid_degree_type)
22 changes: 10 additions & 12 deletions python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,10 @@ def networkx_call(M, benchmark_callable=None):

# FIXME: This compare is shared across several tests... it should be
# a general utility
def compare(src1, dst1, val1, src2, dst2, val2):
#
def assert_results_equal(src1, dst1, val1, src2, dst2, val2):
# We will do comparison computations by using dataframe
# merge functions (essentially doing fast joins). We
# start by making two data frames
#
df1 = cudf.DataFrame()
df1["src1"] = src1
df1["dst1"] = dst1
Expand All @@ -174,19 +172,18 @@ def compare(src1, dst1, val1, src2, dst2, val2):
if val2 is not None:
df2["val2"] = val2

#
# Check to see if all pairs in the original data frame
# still exist in the new data frame. If we join (merge)
# the data frames where (src1[i]=src2[i]) and (dst1[i]=dst2[i])
# then we should get exactly the same number of entries in
# the data frame if we did not lose any data.
#
# Check to see if all pairs in df1 still exist in the new (merged) data
# frame. If we join (merge) the data frames where (src1[i]=src2[i]) and
# (dst1[i]=dst2[i]) then we should get exactly the same number of entries
# in the data frame if we did not lose any data.
join = df1.merge(df2, left_on=["src1", "dst1"], right_on=["src2", "dst2"])

# Print detailed differences on test failure
if len(df1) != len(join):
join2 = df1.merge(
df2, how="left", left_on=["src1", "dst1"], right_on=["src2", "dst2"]
)
orig_option = pd.get_option("display.max_rows")
pd.set_option("display.max_rows", 500)
print("df1 = \n", df1.sort_values(["src1", "dst1"]))
print("df2 = \n", df2.sort_values(["src2", "dst2"]))
Expand All @@ -196,6 +193,7 @@ def compare(src1, dst1, val1, src2, dst2, val2):
.to_pandas()
.query("src2.isnull()", engine="python"),
)
pd.set_option("display.max_rows", orig_option)

assert len(df1) == len(join)

Expand Down Expand Up @@ -485,7 +483,7 @@ def test_all_pairs_jaccard_with_topk():
worst_coeff = all_pairs_jaccard_results["jaccard_coeff"].min()
better_than_k = jaccard_results[jaccard_results["jaccard_coeff"] > worst_coeff]

compare(
assert_results_equal(
all_pairs_jaccard_results["first"],
all_pairs_jaccard_results["second"],
all_pairs_jaccard_results["jaccard_coeff"],
Expand All @@ -494,7 +492,7 @@ def test_all_pairs_jaccard_with_topk():
jaccard_results["jaccard_coeff"],
)

compare(
assert_results_equal(
better_than_k["first"],
better_than_k["second"],
better_than_k["jaccard_coeff"],
Expand Down
Loading

0 comments on commit e155a8f

Please sign in to comment.