Updates READMEs, updates core_number to properly ignore `degree_typ…

…e`, minor cleanup (#4776) * updates READMEs to remove outdated nx-cugraph text * updates `core_number` docs, APIs, tests to properly ignore `degree_type` due to `core_number` not supporting directed graphs which `degree_type` is intended for - `degree_type` settings will be honored when directed graphs are supported. * renames test helper function for clarity * fixes issue with datasets API to properly recreate the edgelist for MG (dask) if previously created for SG. Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Don Acosta (https://github.com/acostadon) - Alex Barghi (https://github.com/alexbarghi-nv) - Brad Rees (https://github.com/BradReesWork) URL: #4776
rapidsai · Nov 25, 2024 · e155a8f · e155a8f
1 parent a977755
commit e155a8f
Show file tree

Hide file tree

Showing 8 changed files with 90 additions and 97 deletions.
diff --git a/README.md b/README.md
@@ -34,16 +34,6 @@
 
 </div>
 
------
-## News
-
-___NEW!___   _[nx-cugraph](https://rapids.ai/nx-cugraph/)_, a NetworkX backend that provides GPU acceleration to NetworkX with zero code change.
-```
-> pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com
-> export NETWORKX_AUTOMATIC_BACKENDS=cugraph
-```
-That's it.  NetworkX now leverages cuGraph for accelerated graph algorithms.
-
 -----
 
 ## Table of contents

diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py
@@ -23,29 +23,27 @@
 def core_number(G, degree_type="bidirectional"):
     """
     Compute the core numbers for the nodes of the graph G. A k-core of a graph
-    is a maximal subgraph that contains nodes of degree k or more.
-    A node has a core number of k if it belongs a k-core but not to k+1-core.
-    This call does not support a graph with self-loops and parallel
-    edges.
+    is a maximal subgraph that contains nodes of degree k or more.  A node has
+    a core number of k if it belongs to a k-core but not to k+1-core.  This
+    call does not support a graph with self-loops and parallel edges.
 
     Parameters
     ----------
     G : cuGraph.Graph or networkx.Graph
-        The graph should contain undirected edges where undirected edges are
-        represented as directed edges in both directions. While this graph
-        can contain edge weights, they don't participate in the calculation
+        The current implementation only supports undirected graphs.  The graph
+        can contain edge weights, but they don't participate in the calculation
         of the core numbers.
-        The current implementation only supports undirected graphs.
 
         .. deprecated:: 24.12
            Accepting a ``networkx.Graph`` is deprecated and will be removed in a
            future version.  For ``networkx.Graph`` use networkx directly with
            the ``nx-cugraph`` backend. See:  https://rapids.ai/nx-cugraph/
 
     degree_type: str, (default="bidirectional")
-        This option determines if the core number computation should be based
-        on input, output, or both directed edges, with valid values being
-        "incoming", "outgoing", and "bidirectional" respectively.
+        This option is currently ignored.  This option may eventually determine
+        if the core number computation should be based on input, output, or
+        both directed edges, with valid values being "incoming", "outgoing",
+        and "bidirectional" respectively.
 
     Returns
     -------
@@ -63,19 +61,28 @@ def core_number(G, degree_type="bidirectional"):
     >>> from cugraph.datasets import karate
     >>> G = karate.get_graph(download=True)
     >>> df = cugraph.core_number(G)
-
+    >>> df.head()
+       vertex  core_number
+    0      33            4
+    1       0            4
+    2      32            4
+    3       2            4
+    4       1            4
     """
 
     G, isNx = ensure_cugraph_obj_for_nx(G)
 
     if G.is_directed():
         raise ValueError("input graph must be undirected")
 
-    if degree_type not in ["incoming", "outgoing", "bidirectional"]:
-        raise ValueError(
-            f"'degree_type' must be either incoming, "
-            f"outgoing or bidirectional, got: {degree_type}"
-        )
+    # degree_type is currently ignored until libcugraph supports directed
+    # graphs for core_number. Once supporteed, degree_type should be checked
+    # like so:
+    # if degree_type not in ["incoming", "outgoing", "bidirectional"]:
+    #     raise ValueError(
+    #         f"'degree_type' must be either incoming, "
+    #         f"outgoing or bidirectional, got: {degree_type}"
+    #     )
 
     vertex, core_number = pylibcugraph_core_number(
         resource_handle=ResourceHandle(),

diff --git a/python/cugraph/cugraph/dask/cores/core_number.py b/python/cugraph/cugraph/dask/cores/core_number.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -53,15 +53,15 @@ def core_number(input_graph, degree_type="bidirectional"):
     Parameters
     ----------
     input_graph : cugraph.graph
-        cuGraph graph descriptor, should contain the connectivity information,
-        (edge weights are not used in this algorithm).
-        The current implementation only supports undirected graphs.
+        The current implementation only supports undirected graphs.  The graph
+        can contain edge weights, but they don't participate in the calculation
+        of the core numbers.
 
     degree_type: str, (default="bidirectional")
-        This option determines if the core number computation should be based
-        on input, output, or both directed edges, with valid values being
-        "incoming", "outgoing", and "bidirectional" respectively.
-
+        This option is currently ignored.  This option may eventually determine
+        if the core number computation should be based on input, output, or
+        both directed edges, with valid values being "incoming", "outgoing",
+        and "bidirectional" respectively.
 
     Returns
     -------
@@ -77,11 +77,14 @@ def core_number(input_graph, degree_type="bidirectional"):
     if input_graph.is_directed():
         raise ValueError("input graph must be undirected")
 
-    if degree_type not in ["incoming", "outgoing", "bidirectional"]:
-        raise ValueError(
-            f"'degree_type' must be either incoming, "
-            f"outgoing or bidirectional, got: {degree_type}"
-        )
+    # degree_type is currently ignored until libcugraph supports directed
+    # graphs for core_number. Once supporteed, degree_type should be checked
+    # like so:
+    # if degree_type not in ["incoming", "outgoing", "bidirectional"]:
+    #     raise ValueError(
+    #         f"'degree_type' must be either incoming, "
+    #         f"outgoing or bidirectional, got: {degree_type}"
+    #     )
 
     # Initialize dask client
     client = default_client()

diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py
@@ -352,7 +352,9 @@ def get_dask_graph(
             If True, stores the transpose of the adjacency matrix.  Required
             for certain algorithms.
         """
-        if self._edgelist is None:
+        if self._edgelist is None or not isinstance(
+            self._edgelist, dask_cudf.DataFrame
+        ):
             self.get_dask_edgelist(download=download)
 
         if create_using is None:

diff --git a/python/cugraph/cugraph/tests/core/test_core_number.py b/python/cugraph/cugraph/tests/core/test_core_number.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -32,11 +32,15 @@ def setup_function():
 # =============================================================================
 # Pytest fixtures
 # =============================================================================
-degree_type = ["incoming", "outgoing"]
+# FIXME: degree_type is currently unsupported (ignored)
+# degree_type = ["incoming", "outgoing"]
 
+# fixture_params = gen_fixture_params_product(
+#     (UNDIRECTED_DATASETS, "graph_file"),
+#     (degree_type, "degree_type"),
+# )
 fixture_params = gen_fixture_params_product(
     (UNDIRECTED_DATASETS, "graph_file"),
-    (degree_type, "degree_type"),
 )
 
 
@@ -46,7 +50,9 @@ def input_combo(request):
     This fixture returns a dictionary containing all input params required to
     run a Core number algo
     """
-    parameters = dict(zip(("graph_file", "degree_type"), request.param))
+    # FIXME: degree_type is not supported so do not test with different values
+    # parameters = dict(zip(("graph_file", "degree_type"), request.param))
+    parameters = {"graph_file": request.param[0]}
 
     graph_file = parameters["graph_file"]
     G = graph_file.get_graph()
@@ -69,7 +75,8 @@ def input_combo(request):
 def test_core_number(input_combo):
     G = input_combo["G"]
     Gnx = input_combo["Gnx"]
-    degree_type = input_combo["degree_type"]
+    # FIXME: degree_type is currently unsupported (ignored)
+    # degree_type = input_combo["degree_type"]
     nx_core_number_results = cudf.DataFrame()
 
     dic_results = nx.core_number(Gnx)
@@ -80,7 +87,7 @@ def test_core_number(input_combo):
     )
 
     core_number_results = (
-        cugraph.core_number(G, degree_type)
+        cugraph.core_number(G)
         .sort_values("vertex")
         .reset_index(drop=True)
         .rename(columns={"core_number": "cugraph_core_number"})
@@ -109,8 +116,3 @@ def test_core_number_invalid_input(input_combo):
 
     with pytest.raises(ValueError):
         cugraph.core_number(G)
-
-    invalid_degree_type = "invalid"
-    G = input_combo["G"]
-    with pytest.raises(ValueError):
-        cugraph.core_number(G, invalid_degree_type)
diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py
@@ -17,7 +17,7 @@
 
 import cugraph
 import cugraph.dask as dcg
-from cugraph.datasets import karate, dolphins, karate_asymmetric
+from cugraph.datasets import karate, dolphins
 
 
 # =============================================================================
@@ -35,17 +35,18 @@ def setup_function():
 
 
 DATASETS = [karate, dolphins]
-DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"]
+# FIXME: degree_type is currently unsupported (ignored)
+# DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"]
 
 
 # =============================================================================
 # Helper Functions
 # =============================================================================
 
 
-def get_sg_results(dataset, degree_type):
+def get_sg_results(dataset):
     G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
-    res = cugraph.core_number(G, degree_type)
+    res = cugraph.core_number(G)
     res = res.sort_values("vertex").reset_index(drop=True)
     return res
 
@@ -57,23 +58,23 @@ def get_sg_results(dataset, degree_type):
 
 @pytest.mark.mg
 @pytest.mark.parametrize("dataset", DATASETS)
-@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
-def test_sg_core_number(dask_client, dataset, degree_type, benchmark):
+# @pytest.mark.parametrize("degree_type", DEGREE_TYPE)
+def test_sg_core_number(dask_client, dataset, benchmark):
     # This test is only for benchmark purposes.
     sg_core_number_results = None
     G = dataset.get_graph(create_using=cugraph.Graph(directed=False))
-    sg_core_number_results = benchmark(cugraph.core_number, G, degree_type)
+    sg_core_number_results = benchmark(cugraph.core_number, G)
     assert sg_core_number_results is not None
 
 
 @pytest.mark.mg
 @pytest.mark.parametrize("dataset", DATASETS)
-@pytest.mark.parametrize("degree_type", DEGREE_TYPE)
-def test_core_number(dask_client, dataset, degree_type, benchmark):
+# @pytest.mark.parametrize("degree_type", DEGREE_TYPE)
+def test_core_number(dask_client, dataset, benchmark):
     dataset.get_dask_edgelist(download=True)  # reload with MG edgelist
     dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False))
 
-    result_core_number = benchmark(dcg.core_number, dg, degree_type)
+    result_core_number = benchmark(dcg.core_number, dg)
     result_core_number = (
         result_core_number.drop_duplicates()
         .compute()
@@ -82,21 +83,11 @@ def test_core_number(dask_client, dataset, degree_type, benchmark):
         .rename(columns={"core_number": "mg_core_number"})
     )
 
-    expected_output = get_sg_results(dataset, degree_type)
+    expected_output = get_sg_results(dataset)
 
     # Update the mg core number with sg core number results
     # for easy comparison using cuDF DataFrame methods.
     result_core_number["sg_core_number"] = expected_output["core_number"]
     counts_diffs = result_core_number.query("mg_core_number != sg_core_number")
 
     assert len(counts_diffs) == 0
-
-
-@pytest.mark.mg
-def test_core_number_invalid_input():
-    dg = karate_asymmetric.get_graph(create_using=cugraph.Graph(directed=True))
-
-    invalid_degree_type = 3
-
-    with pytest.raises(ValueError):
-        dcg.core_number(dg, invalid_degree_type)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
@@ -156,12 +156,10 @@ def networkx_call(M, benchmark_callable=None):
 
 # FIXME: This compare is shared across several tests... it should be
 #        a general utility
-def compare(src1, dst1, val1, src2, dst2, val2):
-    #
+def assert_results_equal(src1, dst1, val1, src2, dst2, val2):
     #  We will do comparison computations by using dataframe
     #  merge functions (essentially doing fast joins).  We
     #  start by making two data frames
-    #
     df1 = cudf.DataFrame()
     df1["src1"] = src1
     df1["dst1"] = dst1
@@ -174,19 +172,18 @@ def compare(src1, dst1, val1, src2, dst2, val2):
     if val2 is not None:
         df2["val2"] = val2
 
-    #
-    #  Check to see if all pairs in the original data frame
-    #  still exist in the new data frame.  If we join (merge)
-    #  the data frames where (src1[i]=src2[i]) and (dst1[i]=dst2[i])
-    #  then we should get exactly the same number of entries in
-    #  the data frame if we did not lose any data.
-    #
+    #  Check to see if all pairs in df1 still exist in the new (merged) data
+    #  frame.  If we join (merge) the data frames where (src1[i]=src2[i]) and
+    #  (dst1[i]=dst2[i]) then we should get exactly the same number of entries
+    #  in the data frame if we did not lose any data.
     join = df1.merge(df2, left_on=["src1", "dst1"], right_on=["src2", "dst2"])
 
+    # Print detailed differences on test failure
     if len(df1) != len(join):
         join2 = df1.merge(
             df2, how="left", left_on=["src1", "dst1"], right_on=["src2", "dst2"]
         )
+        orig_option = pd.get_option("display.max_rows")
         pd.set_option("display.max_rows", 500)
         print("df1 = \n", df1.sort_values(["src1", "dst1"]))
         print("df2 = \n", df2.sort_values(["src2", "dst2"]))
@@ -196,6 +193,7 @@ def compare(src1, dst1, val1, src2, dst2, val2):
             .to_pandas()
             .query("src2.isnull()", engine="python"),
         )
+        pd.set_option("display.max_rows", orig_option)
 
     assert len(df1) == len(join)
 
@@ -485,7 +483,7 @@ def test_all_pairs_jaccard_with_topk():
     worst_coeff = all_pairs_jaccard_results["jaccard_coeff"].min()
     better_than_k = jaccard_results[jaccard_results["jaccard_coeff"] > worst_coeff]
 
-    compare(
+    assert_results_equal(
         all_pairs_jaccard_results["first"],
         all_pairs_jaccard_results["second"],
         all_pairs_jaccard_results["jaccard_coeff"],
@@ -494,7 +492,7 @@ def test_all_pairs_jaccard_with_topk():
         jaccard_results["jaccard_coeff"],
     )
 
-    compare(
+    assert_results_equal(
         better_than_k["first"],
         better_than_k["second"],
         better_than_k["jaccard_coeff"],