Skip to content

Commit

Permalink
Remove Deprecated Sampling Options (#3816)
Browse files Browse the repository at this point in the history
The `uniform_neighbor_sample` code is becoming increasingly difficult to maintain.  This PR removes all the options that were deprecated in the previous release, and also deprecates the `with_edge_properties` option, which will be replaced by returning whatever properties are in the graph in the next release.

This PR also resolves a FIXME by allowing `fanout_vals` to be a `cupy.ndarray`, `numpy.ndarray`, or `cudf.Series`.

Closes #3698

Authors:
  - Alex Barghi (https://github.com/alexbarghi-nv)

Approvers:
  - Brad Rees (https://github.com/BradReesWork)
  - Rick Ratzel (https://github.com/rlratzel)

URL: #3816
  • Loading branch information
alexbarghi-nv authored Sep 1, 2023
1 parent f13feff commit 2b4118a
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 495 deletions.
1 change: 1 addition & 0 deletions python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ def __construct_graph(
-------
A newly-constructed directed cugraph.MultiGraph object.
"""

# Ensure the original dict is not modified.
edge_info_cg = {}

Expand Down
22 changes: 18 additions & 4 deletions python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,21 @@ def test_neighbor_sample(dask_client, basic_graph_1):
F, G, N = basic_graph_1
cugraph_store = CuGraphStore(F, G, N, multi_gpu=True)

batches = cudf.DataFrame(
{
"start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
"batch": cudf.Series(cupy.zeros(5, dtype="int32")),
}
)

sampling_results = (
uniform_neighbor_sample(
cugraph_store._subgraph(),
cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
batches,
with_batch_ids=True,
fanout_vals=[-1],
with_replacement=False,
with_edge_properties=True,
batch_id_list=cudf.Series(cupy.zeros(5, dtype="int32")),
random_state=62,
return_offsets=False,
return_hops=True,
Expand Down Expand Up @@ -90,16 +97,23 @@ def test_neighbor_sample_multi_vertex(dask_client, multi_edge_multi_vertex_graph
F, G, N = multi_edge_multi_vertex_graph_1
cugraph_store = CuGraphStore(F, G, N, multi_gpu=True)

batches = cudf.DataFrame(
{
"start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
"batches": cudf.Series(cupy.zeros(5, dtype="int32")),
}
)

sampling_results = (
uniform_neighbor_sample(
cugraph_store._subgraph(),
cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
batches,
fanout_vals=[-1],
with_replacement=False,
with_edge_properties=True,
batch_id_list=cudf.Series(cupy.zeros(5, dtype="int32")),
random_state=62,
return_offsets=False,
with_batch_ids=True,
)
.sort_values(by=["sources", "destinations"])
.compute()
Expand Down
22 changes: 18 additions & 4 deletions python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,20 @@ def test_neighbor_sample(basic_graph_1):
F, G, N = basic_graph_1
cugraph_store = CuGraphStore(F, G, N)

batches = cudf.DataFrame(
{
"start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
"batch": cudf.Series(cupy.zeros(5, dtype="int32")),
}
)

sampling_results = uniform_neighbor_sample(
cugraph_store._subgraph(),
cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
batches,
fanout_vals=[-1],
with_replacement=False,
with_edge_properties=True,
batch_id_list=cudf.Series(cupy.zeros(5, dtype="int32")),
with_batch_ids=True,
random_state=62,
return_offsets=False,
).sort_values(by=["sources", "destinations"])
Expand Down Expand Up @@ -82,15 +89,22 @@ def test_neighbor_sample_multi_vertex(multi_edge_multi_vertex_graph_1):
F, G, N = multi_edge_multi_vertex_graph_1
cugraph_store = CuGraphStore(F, G, N)

batches = cudf.DataFrame(
{
"start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
"batch": cudf.Series(cupy.zeros(5, dtype="int32")),
}
)

sampling_results = uniform_neighbor_sample(
cugraph_store._subgraph(),
cudf.Series([0, 1, 2, 3, 4], dtype="int64"),
batches,
fanout_vals=[-1],
with_replacement=False,
with_edge_properties=True,
batch_id_list=cudf.Series(cupy.zeros(5, dtype="int32")),
random_state=62,
return_offsets=False,
with_batch_ids=True,
).sort_values(by=["sources", "destinations"])

out = _sampler_output_from_sampling_results(
Expand Down
Loading

0 comments on commit 2b4118a

Please sign in to comment.