Add back changes except edge_betweenness_centrality

rapidsai · Mar 11, 2024 · 5347dd9 · 5347dd9
1 parent 36182a8
commit 5347dd9
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 182 deletions.
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -24,44 +24,49 @@
     compare_scores,
 )
 
-DIRECTED_GRAPH_OPTIONS = [False, True]
-WEIGHTED_GRAPH_OPTIONS = [False, True]
-ENDPOINTS_OPTIONS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
-DEFAULT_EPSILON = 0.0001
-SUBSET_SIZE_OPTIONS = [4, None]
-SUBSET_SEED_OPTIONS = [42]
-
 
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate]
-# FIXME: The "preset_gpu_count" from 21.08 and below are currently not
-# supported and have been removed
 
-RESULT_DTYPE_OPTIONS = [np.float64]
+
+DATASETS = [karate]
+DEFAULT_EPSILON = 0.0001
+IS_DIRECTED = [False, True]
+ENDPOINTS = [False, True]
+IS_NORMALIZED = [False, True]
+RESULT_DTYPES = [np.float64]
+SUBSET_SIZES = [4, None]
+SUBSET_SEEDS = [42]
+IS_WEIGHTED = [False, True]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS)
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("endpoints", ENDPOINTS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_betweenness_centrality(
     graph_file,
     directed,

diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -19,49 +19,49 @@
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate, netscience
 
-# Get parameters from standard betwenness_centrality_test
-# As tests directory is not a module, we need to add it to the path
-# FIXME: Test must be reworked to import from 'cugraph.testing' instead of
-# importing from other tests
-from test_edge_betweenness_centrality import (
-    DIRECTED_GRAPH_OPTIONS,
-    NORMALIZED_OPTIONS,
-    DEFAULT_EPSILON,
-    SUBSET_SIZE_OPTIONS,
-)
-
 from test_edge_betweenness_centrality import (
     calc_edge_betweenness_centrality,
     compare_scores,
 )
 
+
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate, netscience]
 
-# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have
-# been removed
-RESULT_DTYPE_OPTIONS = [np.float32, np.float64]
+
+DATASETS = [karate, netscience]
+IS_DIRECTED = [True, False]
+IS_NORMALIZED = [True, False]
+DEFAULT_EPSILON = 0.0001
+SUBSET_SIZES = [4, None]
+RESULT_DTYPES = [np.float32, np.float64]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
     "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
 )
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_edge_betweenness_centrality(
     graph_file,
     directed,

diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py
@@ -15,13 +15,11 @@
 
 import pytest
 
-import dask_cudf
 import cupy
 import cudf
 import cugraph
 import cugraph.dask as dcg
-from cugraph.testing import utils
-from pylibcugraph.testing import gen_fixture_params_product
+from cugraph.datasets import karate, dolphins
 
 
 # =============================================================================
@@ -33,157 +31,112 @@ def setup_function():
     gc.collect()
 
 
-IS_DIRECTED = [True, False]
+# =============================================================================
+# Parameters
+# =============================================================================
 
+DATASETS = [karate, dolphins]
+IS_DIRECTED = [True, False]
+IS_NORMALIZED = [True, False]
+ENDPOINTS = [True, False]
+SUBSET_SEEDS = [42, None]
+SUBSET_SIZES = [None, 15]
+VERTEX_LIST_TYPES = [list, cudf]
 
 # =============================================================================
-# Pytest fixtures
+# Helper functions
 # =============================================================================
 
-datasets = utils.DATASETS_UNDIRECTED
-
-fixture_params = gen_fixture_params_product(
-    (datasets, "graph_file"),
-    ([False, True], "normalized"),
-    ([False, True], "endpoints"),
-    ([42, None], "subset_seed"),
-    ([None, 15], "subset_size"),
-    (IS_DIRECTED, "directed"),
-    ([list, cudf], "vertex_list_type"),
-)
-
-
-@pytest.fixture(scope="module", params=fixture_params)
-def input_combo(request):
-    """
-    Simply return the current combination of params as a dictionary for use in
-    tests or other parameterized fixtures.
-    """
-    parameters = dict(
-        zip(
-            (
-                "graph_file",
-                "normalized",
-                "endpoints",
-                "subset_seed",
-                "subset_size",
-                "directed",
-                "vertex_list_type",
-            ),
-            request.param,
-        )
+
+def get_sg_graph(dataset, directed):
+    G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))
+
+    return G
+
+
+def get_mg_graph(dataset, directed):
+    ddf = dataset.get_dask_edgelist()
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="wgt",
+        renumber=True,
+        store_transposed=True,
     )
 
-    return parameters
+    return dg
 
 
-@pytest.fixture(scope="module")
-def input_expected_output(input_combo):
-    """
-    This fixture returns the inputs and expected results from the
-    betweenness_centrality algo based on cuGraph betweenness_centrality) which can
-    be used for validation.
-    """
+# =============================================================================
+# Tests
+# =============================================================================
 
-    input_data_path = input_combo["graph_file"]
-    normalized = input_combo["normalized"]
-    endpoints = input_combo["endpoints"]
-    random_state = input_combo["subset_seed"]
-    subset_size = input_combo["subset_size"]
-    directed = input_combo["directed"]
-    vertex_list_type = input_combo["vertex_list_type"]
 
-    G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+@pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("endpoint", ENDPOINTS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("v_list_type", VERTEX_LIST_TYPES)
+def test_dask_mg_betweenness_centrality(
+    dataset,
+    directed,
+    normalized,
+    endpoint,
+    subset_seed,
+    subset_size,
+    v_list_type,
+    dask_client,
+    benchmark,
+):
+    g = get_sg_graph(dataset, directed)
+    dataset.unload()
+    dg = get_mg_graph(dataset, directed)
+    random_state = subset_seed
 
     if subset_size is None:
         k = subset_size
     elif isinstance(subset_size, int):
         # Select random vertices
-        k = G.select_random_vertices(
+        k = g.select_random_vertices(
             random_state=random_state, num_vertices=subset_size
         )
-        if vertex_list_type is list:
+        if v_list_type is list:
             k = k.to_arrow().to_pylist()
 
         print("the seeds are \n", k)
-        if vertex_list_type is int:
+        if v_list_type is int:
             # This internally sample k vertices in betweenness centrality.
             # Since the nodes that will be sampled by each implementation will
             # be random, therefore sample all vertices which will make the test
             # consistent.
-            k = len(G.nodes())
-
-    input_combo["k"] = k
+            k = len(g.nodes())
 
     sg_cugraph_bc = cugraph.betweenness_centrality(
-        G, k=k, normalized=normalized, endpoints=endpoints, random_state=random_state
+        g, k=k, normalized=normalized, endpoints=endpoint, random_state=random_state
     )
-    # Save the results back to the input_combo dictionary to prevent redundant
-    # cuGraph runs. Other tests using the input_combo fixture will look for
-    # them, and if not present they will have to re-run the same cuGraph call.
     sg_cugraph_bc = sg_cugraph_bc.sort_values("vertex").reset_index(drop=True)
 
-    input_combo["sg_cugraph_results"] = sg_cugraph_bc
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=directed)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-        store_transposed=True,
-    )
-
-    input_combo["MGGraph"] = dg
-
-    return input_combo
-
-
-# =============================================================================
-# Tests
-# =============================================================================
-
-
-# @pytest.mark.skipif(
-#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-# )
-
-
-@pytest.mark.mg
-def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output):
-
-    dg = input_expected_output["MGGraph"]
-    k = input_expected_output["k"]
-    endpoints = input_expected_output["endpoints"]
-    normalized = input_expected_output["normalized"]
-    random_state = input_expected_output["subset_seed"]
     mg_bc_results = benchmark(
         dcg.betweenness_centrality,
         dg,
         k=k,
         normalized=normalized,
-        endpoints=endpoints,
+        endpoints=endpoint,
         random_state=random_state,
     )
 
     mg_bc_results = (
         mg_bc_results.compute().sort_values("vertex").reset_index(drop=True)
     )["betweenness_centrality"].to_cupy()
 
-    sg_bc_results = (
-        input_expected_output["sg_cugraph_results"]
-        .sort_values("vertex")
-        .reset_index(drop=True)
-    )["betweenness_centrality"].to_cupy()
+    sg_bc_results = (sg_cugraph_bc.sort_values("vertex").reset_index(drop=True))[
+        "betweenness_centrality"
+    ].to_cupy()
 
     diff = cupy.isclose(mg_bc_results, sg_bc_results)