From d5230faad533bb78bea0cf583682ba723eee2960 Mon Sep 17 00:00:00 2001
From: Alexandria Barghi <abarghi@nvidia.com>
Date: Fri, 29 Dec 2023 16:04:44 -0800
Subject: [PATCH] remove flag

---
 .../cugraph_pyg/sampler/cugraph_sampler.py    |   1 -
 .../dask/sampling/uniform_neighbor_sample.py  | 172 +++++++-----------
 .../sampling/uniform_neighbor_sample.py       | 150 ++++++---------
 .../sampling/test_uniform_neighbor_sample.py  |   3 -
 .../test_uniform_neighbor_sample_mg.py        |   5 -
 5 files changed, 124 insertions(+), 207 deletions(-)

diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py b/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py
index b6ec932abbe..4750e0cbc07 100644
--- a/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py
+++ b/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py
@@ -199,7 +199,6 @@ def __neighbor_sample(
             # conversion required by cugraph api
             list(num_neighbors),
             replace,
-            with_edge_properties=True,
         )
 
         if self.__graph_store._is_delayed:
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
index 15d109452eb..fecc34f799e 100644
--- a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
@@ -94,58 +94,44 @@ def create_empty_df_with_edge_props(indices_t, weight_t, return_offsets=False):
         return df
 
 
-def convert_to_cudf(cp_arrays, weight_t, with_edge_properties, return_offsets=False):
+def convert_to_cudf(cp_arrays, return_offsets=False):
     """
     Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
     """
     df = cudf.DataFrame()
 
-    if with_edge_properties:
-        (
-            sources,
-            destinations,
-            weights,
-            edge_ids,
-            edge_types,
-            batch_ids,
-            offsets,
-            hop_ids,
-        ) = cp_arrays
-
-        df[src_n] = sources
-        df[dst_n] = destinations
-        df[weight_n] = weights
-        df[edge_id_n] = edge_ids
-        df[edge_type_n] = edge_types
-        df[hop_id_n] = hop_ids
-
-        if return_offsets:
-            offsets_df = cudf.DataFrame(
-                {
-                    batch_id_n: batch_ids,
-                    offsets_n: offsets[:-1],
-                }
-            )
-            return df, offsets_df
-        else:
-            if len(batch_ids) > 0:
-                batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets))
-                batch_ids.reset_index(drop=True, inplace=True)
+    (
+        sources,
+        destinations,
+        weights,
+        edge_ids,
+        edge_types,
+        batch_ids,
+        offsets,
+        hop_ids,
+    ) = cp_arrays
+
+    df[src_n] = sources
+    df[dst_n] = destinations
+    df[weight_n] = weights
+    df[edge_id_n] = edge_ids
+    df[edge_type_n] = edge_types
+    df[hop_id_n] = hop_ids
 
-            df[batch_id_n] = batch_ids
-            return df
+    if return_offsets:
+        offsets_df = cudf.DataFrame(
+            {
+                batch_id_n: batch_ids,
+                offsets_n: offsets[:-1],
+            }
+        )
+        return df, offsets_df
     else:
-        cupy_sources, cupy_destinations, cupy_indices = cp_arrays
-
-        df[src_n] = cupy_sources
-        df[dst_n] = cupy_destinations
-        df[indices_n] = cupy_indices
-
-        if weight_t == "int32":
-            df.indices = df.indices.astype("int32")
-        elif weight_t == "int64":
-            df.indices = df.indices.astype("int64")
+        if len(batch_ids) > 0:
+            batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets))
+            batch_ids.reset_index(drop=True, inplace=True)
 
+        df[batch_id_n] = batch_ids
         return df
 
 
@@ -157,8 +143,6 @@ def _call_plc_uniform_neighbor_sample(
     label_to_output_comm_rank,
     fanout_vals,
     with_replacement,
-    weight_t,
-    with_edge_properties,
     random_state=None,
     return_offsets=False,
 ):
@@ -173,12 +157,11 @@ def _call_plc_uniform_neighbor_sample(
         h_fan_out=fanout_vals,
         with_replacement=with_replacement,
         do_expensive_check=False,
-        with_edge_properties=with_edge_properties,
         batch_id_list=batch_id_list_x,
         random_state=random_state,
     )
     return convert_to_cudf(
-        cp_arrays, weight_t, with_edge_properties, return_offsets=return_offsets
+        cp_arrays, return_offsets=return_offsets
     )
 
 
@@ -193,7 +176,6 @@ def _mg_call_plc_uniform_neighbor_sample(
     with_replacement,
     weight_t,
     indices_t,
-    with_edge_properties,
     random_state,
     return_offsets=False,
 ):
@@ -207,8 +189,6 @@ def _mg_call_plc_uniform_neighbor_sample(
             label_to_output_comm_rank,
             fanout_vals,
             with_replacement,
-            weight_t=weight_t,
-            with_edge_properties=with_edge_properties,
             # FIXME accept and properly transmute a numpy/cupy random state.
             random_state=hash((random_state, i)),
             workers=[w],
@@ -223,8 +203,6 @@ def _mg_call_plc_uniform_neighbor_sample(
         create_empty_df_with_edge_props(
             indices_t, weight_t, return_offsets=return_offsets
         )
-        if with_edge_properties
-        else create_empty_df(indices_t, weight_t)
     )
 
     if return_offsets:
@@ -251,7 +229,6 @@ def uniform_neighbor_sample(
     start_list: Sequence,
     fanout_vals: List[int],
     with_replacement: bool = True,
-    with_edge_properties: bool = False,
     batch_id_list: Sequence = None,
     label_list: Sequence = None,
     label_to_output_comm_rank: bool = None,
@@ -279,13 +256,8 @@ def uniform_neighbor_sample(
     with_replacement: bool, optional (default=True)
         Flag to specify if the random sampling is done with replacement
 
-    with_edge_properties: bool, optional (default=False)
-        Flag to specify whether to return edge properties (weight, edge id,
-        edge type, batch id, hop id) with the sampled edges.
-
     batch_id_list: cudf.Series or dask_cudf.Series (int32), optional (default=None)
-        List of batch ids that will be returned with the sampled edges if
-        with_edge_properties is set to True.
+        List of batch ids that will be returned with the sampled edges.
 
     label_list: cudf.Series or dask_cudf.Series (int32), optional (default=None)
         List of unique batch id labels.  Used along with
@@ -314,51 +286,40 @@ def uniform_neighbor_sample(
     -------
     result : dask_cudf.DataFrame or Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]
         GPU distributed data frame containing several dask_cudf.Series
-
-        If with_edge_properties=True:
-            ddf['sources']: dask_cudf.Series
+        If return_offsets=False:
+            df['sources']: dask_cudf.Series
                 Contains the source vertices from the sampling result
-            ddf['destinations']: dask_cudf.Series
+            df['destinations']: dask_cudf.Series
                 Contains the destination vertices from the sampling result
-            ddf['indices']: dask_cudf.Series
-                Contains the indices from the sampling result for path
-                reconstruction
-
-        If with_edge_properties=False:
-            If return_offsets=False:
-                df['sources']: dask_cudf.Series
-                    Contains the source vertices from the sampling result
-                df['destinations']: dask_cudf.Series
-                    Contains the destination vertices from the sampling result
-                df['edge_weight']: dask_cudf.Series
-                    Contains the edge weights from the sampling result
-                df['edge_id']: dask_cudf.Series
-                    Contains the edge ids from the sampling result
-                df['edge_type']: dask_cudf.Series
-                    Contains the edge types from the sampling result
-                df['batch_id']: dask_cudf.Series
-                    Contains the batch ids from the sampling result
-                df['hop_id']: dask_cudf.Series
-                    Contains the hop ids from the sampling result
-
-            If return_offsets=True:
-                df['sources']: cudf.Series
-                    Contains the source vertices from the sampling result
-                df['destinations']: cudf.Series
-                    Contains the destination vertices from the sampling result
-                df['edge_weight']: cudf.Series
-                    Contains the edge weights from the sampling result
-                df['edge_id']: cudf.Series
-                    Contains the edge ids from the sampling result
-                df['edge_type']: cudf.Series
-                    Contains the edge types from the sampling result
-                df['hop_id']: cudf.Series
-                    Contains the hop ids from the sampling result
-
-                offsets_df['batch_id']: cudf.Series
-                    Contains the batch ids from the sampling result
-                offsets_df['offsets']: cudf.Series
-                    Contains the offsets of each batch in the sampling result
+            df['edge_weight']: dask_cudf.Series
+                Contains the edge weights from the sampling result
+            df['edge_id']: dask_cudf.Series
+                Contains the edge ids from the sampling result
+            df['edge_type']: dask_cudf.Series
+                Contains the edge types from the sampling result
+            df['batch_id']: dask_cudf.Series
+                Contains the batch ids from the sampling result
+            df['hop_id']: dask_cudf.Series
+                Contains the hop ids from the sampling result
+
+        If return_offsets=True:
+            df['sources']: cudf.Series
+                Contains the source vertices from the sampling result
+            df['destinations']: cudf.Series
+                Contains the destination vertices from the sampling result
+            df['edge_weight']: cudf.Series
+                Contains the edge weights from the sampling result
+            df['edge_id']: cudf.Series
+                Contains the edge ids from the sampling result
+            df['edge_type']: cudf.Series
+                Contains the edge types from the sampling result
+            df['hop_id']: cudf.Series
+                Contains the hop ids from the sampling result
+
+            offsets_df['batch_id']: cudf.Series
+                Contains the batch ids from the sampling result
+            offsets_df['offsets']: cudf.Series
+                Contains the offsets of each batch in the sampling result
     """
 
     if isinstance(start_list, int):
@@ -371,8 +332,7 @@ def uniform_neighbor_sample(
                 input_graph.renumber_map.renumbered_src_col_name
             ].dtype,
         )
-
-    elif with_edge_properties and batch_id_list is None:
+    elif batch_id_list is None:
         batch_id_list = cudf.Series(cp.zeros(len(start_list), dtype="int32"))
 
     # fanout_vals must be a host array!
@@ -450,7 +410,6 @@ def uniform_neighbor_sample(
                     with_replacement=with_replacement,
                     weight_t=weight_t,
                     indices_t=indices_t,
-                    with_edge_properties=with_edge_properties,
                     random_state=random_state,
                     return_offsets=return_offsets,
                 )
@@ -472,7 +431,6 @@ def uniform_neighbor_sample(
             with_replacement=with_replacement,
             weight_t=weight_t,
             indices_t=indices_t,
-            with_edge_properties=with_edge_properties,
             random_state=random_state,
             return_offsets=return_offsets,
         )
diff --git a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
index a7dad6c01a6..79fed3b099c 100644
--- a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
@@ -55,7 +55,6 @@ def uniform_neighbor_sample(
     start_list: Sequence,
     fanout_vals: List[int],
     with_replacement: bool = True,
-    with_edge_properties: bool = False,
     batch_id_list: Sequence = None,
     random_state: int = None,
     return_offsets: bool = False,
@@ -80,10 +79,6 @@ def uniform_neighbor_sample(
     with_replacement: bool, optional (default=True)
         Flag to specify if the random sampling is done with replacement
 
-    with_edge_properties: bool, optional (default=False)
-        Flag to specify whether to return edge properties (weight, edge id,
-        edge type, batch id, hop id) with the sampled edges.
-
     batch_id_list: list (int32)
         List of batch ids that will be returned with the sampled edges if
         with_edge_properties is set to True.
@@ -101,51 +96,40 @@ def uniform_neighbor_sample(
     -------
     result : cudf.DataFrame or Tuple[cudf.DataFrame, cudf.DataFrame]
         GPU data frame containing multiple cudf.Series
-
-        If with_edge_properties=False:
+        If return_offsets=False:
+            df['sources']: cudf.Series
+                Contains the source vertices from the sampling result
+            df['destinations']: cudf.Series
+                Contains the destination vertices from the sampling result
+            df['edge_weight']: cudf.Series
+                Contains the edge weights from the sampling result
+            df['edge_id']: cudf.Series
+                Contains the edge ids from the sampling result
+            df['edge_type']: cudf.Series
+                Contains the edge types from the sampling result
+            df['batch_id']: cudf.Series
+                Contains the batch ids from the sampling result
+            df['hop_id']: cudf.Series
+                Contains the hop ids from the sampling result
+
+        If return_offsets=True:
             df['sources']: cudf.Series
                 Contains the source vertices from the sampling result
             df['destinations']: cudf.Series
                 Contains the destination vertices from the sampling result
-            df['indices']: cudf.Series
-                Contains the indices (edge weights) from the sampling result
-                for path reconstruction
-
-        If with_edge_properties=True:
-            If return_offsets=False:
-                df['sources']: cudf.Series
-                    Contains the source vertices from the sampling result
-                df['destinations']: cudf.Series
-                    Contains the destination vertices from the sampling result
-                df['edge_weight']: cudf.Series
-                    Contains the edge weights from the sampling result
-                df['edge_id']: cudf.Series
-                    Contains the edge ids from the sampling result
-                df['edge_type']: cudf.Series
-                    Contains the edge types from the sampling result
-                df['batch_id']: cudf.Series
-                    Contains the batch ids from the sampling result
-                df['hop_id']: cudf.Series
-                    Contains the hop ids from the sampling result
-
-            If return_offsets=True:
-                df['sources']: cudf.Series
-                    Contains the source vertices from the sampling result
-                df['destinations']: cudf.Series
-                    Contains the destination vertices from the sampling result
-                df['edge_weight']: cudf.Series
-                    Contains the edge weights from the sampling result
-                df['edge_id']: cudf.Series
-                    Contains the edge ids from the sampling result
-                df['edge_type']: cudf.Series
-                    Contains the edge types from the sampling result
-                df['hop_id']: cudf.Series
-                    Contains the hop ids from the sampling result
-
-                offsets_df['batch_id']: cudf.Series
-                    Contains the batch ids from the sampling result
-                offsets_df['offsets']: cudf.Series
-                    Contains the offsets of each batch in the sampling result
+            df['edge_weight']: cudf.Series
+                Contains the edge weights from the sampling result
+            df['edge_id']: cudf.Series
+                Contains the edge ids from the sampling result
+            df['edge_type']: cudf.Series
+                Contains the edge types from the sampling result
+            df['hop_id']: cudf.Series
+                Contains the hop ids from the sampling result
+
+            offsets_df['batch_id']: cudf.Series
+                Contains the batch ids from the sampling result
+            offsets_df['offsets']: cudf.Series
+                Contains the offsets of each batch in the sampling result
     """
 
     if isinstance(start_list, int):
@@ -156,7 +140,7 @@ def uniform_neighbor_sample(
             start_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype
         )
 
-    if with_edge_properties and batch_id_list is None:
+    if batch_id_list is None:
         batch_id_list = cp.zeros(len(start_list), dtype="int32")
 
     # fanout_vals must be a host array!
@@ -186,60 +170,44 @@ def uniform_neighbor_sample(
         h_fan_out=fanout_vals,
         with_replacement=with_replacement,
         do_expensive_check=False,
-        with_edge_properties=with_edge_properties,
         batch_id_list=batch_id_list,
         random_state=random_state,
     )
 
     df = cudf.DataFrame()
 
-    if with_edge_properties:
-        (
-            sources,
-            destinations,
-            weights,
-            edge_ids,
-            edge_types,
-            batch_ids,
-            offsets,
-            hop_ids,
-        ) = sampling_result
-
-        df["sources"] = sources
-        df["destinations"] = destinations
-        df["weight"] = weights
-        df["edge_id"] = edge_ids
-        df["edge_type"] = edge_types
-        df["hop_id"] = hop_ids
-
-        if return_offsets:
-            offsets_df = cudf.DataFrame(
-                {
-                    "batch_id": batch_ids,
-                    "offsets": offsets[:-1],
-                }
-            )
+    (
+        sources,
+        destinations,
+        weights,
+        edge_ids,
+        edge_types,
+        batch_ids,
+        offsets,
+        hop_ids,
+    ) = sampling_result
+
+    df["sources"] = sources
+    df["destinations"] = destinations
+    df["weight"] = weights
+    df["edge_id"] = edge_ids
+    df["edge_type"] = edge_types
+    df["hop_id"] = hop_ids
 
-        else:
-            if len(batch_ids) > 0:
-                batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets))
-                batch_ids.reset_index(drop=True, inplace=True)
-
-            df["batch_id"] = batch_ids
+    if return_offsets:
+        offsets_df = cudf.DataFrame(
+            {
+                "batch_id": batch_ids,
+                "offsets": offsets[:-1],
+            }
+        )
 
     else:
-        sources, destinations, indices = sampling_result
-
-        df["sources"] = sources
-        df["destinations"] = destinations
+        if len(batch_ids) > 0:
+            batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets))
+            batch_ids.reset_index(drop=True, inplace=True)
 
-        df["indices"] = indices
-        if weight_t == "int32":
-            df["indices"] = indices.astype("int32")
-        elif weight_t == "int64":
-            df["indices"] = indices.astype("int64")
-        else:
-            df["indices"] = indices
+        df["batch_id"] = batch_ids
 
     if G.renumbered:
         df = G.unrenumber(df, "sources", preserve_order=True)
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
index 6fe16d97713..a2ee9934b81 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
@@ -333,7 +333,6 @@ def test_uniform_neighbor_sample_edge_properties(return_offsets):
         start_list=start_df["seed"],
         fanout_vals=[2, 2],
         with_replacement=False,
-        with_edge_properties=True,
         batch_id_list=start_df["batch"],
         return_offsets=return_offsets,
     )
@@ -393,7 +392,6 @@ def test_uniform_neighbor_sample_edge_properties_self_loops():
         batch_id_list=cudf.Series([1, 1, 1], dtype="int32"),
         fanout_vals=[2, 2],
         with_replacement=False,
-        with_edge_properties=True,
         random_state=80,
     )
 
@@ -446,7 +444,6 @@ def test_uniform_neighbor_sample_empty_start_list():
         batch_id_list=cudf.Series([], dtype="int32"),
         fanout_vals=[2, 2],
         with_replacement=False,
-        with_edge_properties=True,
         random_state=32,
     )
 
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
index 76657eb634f..7522f7ee31c 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
@@ -353,7 +353,6 @@ def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets):
         start_list=cudf.Series([0, 4], dtype="int64"),
         fanout_vals=[-1, -1],
         with_replacement=False,
-        with_edge_properties=True,
         batch_id_list=cudf.Series([0, 1], dtype="int32"),
         label_list=cudf.Series([0, 1], dtype="int32") if return_offsets else None,
         label_to_output_comm_rank=cudf.Series(dest_rank, dtype="int32")
@@ -447,7 +446,6 @@ def test_uniform_neighbor_sample_edge_properties_self_loops(dask_client):
         ),
         fanout_vals=[2, 2],
         with_replacement=False,
-        with_edge_properties=True,
     ).compute()
 
     assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2]
@@ -507,7 +505,6 @@ def test_uniform_neighbor_edge_properties_sample_small_start_list(
         start_list=cudf.Series([0]),
         fanout_vals=[10, 25],
         with_replacement=with_replacement,
-        with_edge_properties=True,
         batch_id_list=cudf.Series([10], dtype="int32"),
     )
 
@@ -541,7 +538,6 @@ def test_uniform_neighbor_sample_without_dask_inputs(dask_client):
         batch_id_list=cudf.Series([1, 1, 1], dtype="int32"),
         fanout_vals=[2, 2],
         with_replacement=False,
-        with_edge_properties=True,
     ).compute()
 
     assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2]
@@ -608,7 +604,6 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat
         batch_id_list=input_batch,
         fanout_vals=[5, 5],
         with_replacement=False,
-        with_edge_properties=True,
     )
 
     for batch_id in range(max_batches):