Refactor MG Centrality Tests (#4197)

Addresses #4187 This PR makes improvements to old testing conventions used in `cugraph.centrality` MG tests Instead of using nested fixtures. eg. `input_expected_output -> input_combo -> fixture_params` which can be confusing, the `@pytest.mark.parametrize` marker is used to iterate through combinations of parameters used for testing. The fixtures are also replaced by functions used to create SG and MG graphs. Authors: - Ralph Liu (https://github.com/nv-rliu) Approvers: - Don Acosta (https://github.com/acostadon) - Rick Ratzel (https://github.com/rlratzel) - Joseph Nke (https://github.com/jnke2016) URL: #4197
rapidsai · Mar 14, 2024 · fa096f0 · fa096f0
1 parent a410b3f
commit fa096f0
Show file tree

Hide file tree

Showing 7 changed files with 328 additions and 400 deletions.
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -24,46 +24,49 @@
     compare_scores,
 )
 
-DIRECTED_GRAPH_OPTIONS = [False, True]
-WEIGHTED_GRAPH_OPTIONS = [False, True]
-ENDPOINTS_OPTIONS = [False, True]
-NORMALIZED_OPTIONS = [False, True]
-DEFAULT_EPSILON = 0.0001
-SUBSET_SIZE_OPTIONS = [4, None]
-SUBSET_SEED_OPTIONS = [42]
-
 
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate]
-# FIXME: The "preset_gpu_count" from 21.08 and below are currently not
-# supported and have been removed
 
-RESULT_DTYPE_OPTIONS = [np.float64]
+
+DATASETS = [karate]
+DEFAULT_EPSILON = 0.0001
+IS_DIRECTED = [False, True]
+ENDPOINTS = [False, True]
+IS_NORMALIZED = [False, True]
+RESULT_DTYPES = [np.float64]
+SUBSET_SIZES = [4, None]
+SUBSET_SEEDS = [42]
+IS_WEIGHTED = [False, True]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize(
-    "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
-)
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS)
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("endpoints", ENDPOINTS)
+@pytest.mark.parametrize("subset_seed", SUBSET_SEEDS)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_betweenness_centrality(
-    graph_file,
+    dataset,
     directed,
     subset_size,
     normalized,
@@ -74,7 +77,7 @@ def test_mg_betweenness_centrality(
     dask_client,
 ):
     sorted_df = calc_betweenness_centrality(
-        graph_file,
+        dataset,
         directed=directed,
         normalized=normalized,
         k=subset_size,
@@ -90,3 +93,6 @@ def test_mg_betweenness_centrality(
         second_key="ref_bc",
         epsilon=DEFAULT_EPSILON,
     )
+
+    # Clean-up stored dataset edge-lists
+    dataset.unload()
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,59 +19,57 @@
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate, netscience
 
-# Get parameters from standard betwenness_centrality_test
-# As tests directory is not a module, we need to add it to the path
-# FIXME: Test must be reworked to import from 'cugraph.testing' instead of
-# importing from other tests
-from test_edge_betweenness_centrality import (
-    DIRECTED_GRAPH_OPTIONS,
-    NORMALIZED_OPTIONS,
-    DEFAULT_EPSILON,
-    SUBSET_SIZE_OPTIONS,
-)
-
 from test_edge_betweenness_centrality import (
     calc_edge_betweenness_centrality,
     compare_scores,
 )
 
+
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate, netscience]
 
-# FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have
-# been removed
-RESULT_DTYPE_OPTIONS = [np.float32, np.float64]
+
+DATASETS = [karate, netscience]
+IS_DIRECTED = [True, False]
+IS_NORMALIZED = [True, False]
+DEFAULT_EPSILON = 0.0001
+SUBSET_SIZES = [4, None]
+RESULT_DTYPES = [np.float32, np.float64]
 
 
 # =============================================================================
 # Pytest Setup / Teardown - called for each test function
 # =============================================================================
+
+
 def setup_function():
     gc.collect()
 
 
+# =============================================================================
+# Tests
+# =============================================================================
+
+
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
-@pytest.mark.parametrize(
-    "graph_file", DATASETS, ids=[f"dataset={d.get_path().stem}" for d in DATASETS]
-)
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
-@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
-@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("directed", IS_DIRECTED)
+@pytest.mark.parametrize("subset_size", SUBSET_SIZES)
+@pytest.mark.parametrize("normalized", IS_NORMALIZED)
+@pytest.mark.parametrize("result_dtype", RESULT_DTYPES)
 def test_mg_edge_betweenness_centrality(
-    graph_file,
+    dataset,
     directed,
     subset_size,
     normalized,
     result_dtype,
     dask_client,
 ):
     sorted_df = calc_edge_betweenness_centrality(
-        graph_file,
+        dataset,
         directed=directed,
         normalized=normalized,
         k=subset_size,
@@ -86,3 +84,5 @@ def test_mg_edge_betweenness_centrality(
         second_key="ref_bc",
         epsilon=DEFAULT_EPSILON,
     )
+    # Clean-up stored dataset edge-lists
+    dataset.unload()