Skip to content

Commit

Permalink
Merge branch 'branch-24.06' into model_pyg25
Browse files Browse the repository at this point in the history
  • Loading branch information
tingyu66 committed May 15, 2024
2 parents b1943a7 + a77840e commit 814daaa
Show file tree
Hide file tree
Showing 30 changed files with 483 additions and 726 deletions.
12 changes: 12 additions & 0 deletions cpp/include/cugraph_c/sampling_algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,15 @@ typedef enum cugraph_compression_type_t {
cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t** options,
cugraph_error_t** error);

/**
* @ingroup samplingC
* @brief Set flag to retain seeds (original sources)
*
* @param options - opaque pointer to the sampling options
* @param value - Boolean value to assign to the option
*/
void cugraph_sampling_set_retain_seeds(cugraph_sampling_options_t* options, bool_t value);

/**
* @ingroup samplingC
* @brief Set flag to renumber results
Expand Down Expand Up @@ -335,6 +344,8 @@ void cugraph_sampling_options_free(cugraph_sampling_options_t* options);
* output. If specified then the all data from @p label_list[i] will be shuffled to rank @p. This
* cannot be specified unless @p start_vertex_labels is also specified
* label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks.
* @param [in] label_offsets Device array of the offsets for each label in the seed list. This
* parameter is only used with the retain_seeds option.
* @param [in] fanout Host array defining the fan out at each step in the sampling algorithm.
* We only support fanout values of type INT32
* @param [in/out] rng_state State of the random number generator, updated with each call
Expand All @@ -354,6 +365,7 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample(
const cugraph_type_erased_device_array_view_t* start_vertex_labels,
const cugraph_type_erased_device_array_view_t* label_list,
const cugraph_type_erased_device_array_view_t* label_to_comm_rank,
const cugraph_type_erased_device_array_view_t* label_offsets,
const cugraph_type_erased_host_array_view_t* fan_out,
cugraph_rng_state_t* rng_state,
const cugraph_sampling_options_t* options,
Expand Down
60 changes: 45 additions & 15 deletions cpp/src/c_api/uniform_neighbor_sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ struct cugraph_sampling_options_t {
bool_t renumber_results_{FALSE};
cugraph_compression_type_t compression_type_{cugraph_compression_type_t::COO};
bool_t compress_per_hop_{FALSE};
bool_t retain_seeds_{FALSE};
};

struct cugraph_sample_result_t {
Expand Down Expand Up @@ -68,6 +69,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertex_labels_{nullptr};
cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_list_{nullptr};
cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_to_comm_rank_{nullptr};
cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_offsets_{nullptr};
cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr};
cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
cugraph::c_api::cugraph_sampling_options_t options_{};
Expand All @@ -81,6 +83,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
cugraph_type_erased_device_array_view_t const* start_vertex_labels,
cugraph_type_erased_device_array_view_t const* label_list,
cugraph_type_erased_device_array_view_t const* label_to_comm_rank,
cugraph_type_erased_device_array_view_t const* label_offsets,
cugraph_type_erased_host_array_view_t const* fan_out,
cugraph_rng_state_t* rng_state,
cugraph::c_api::cugraph_sampling_options_t options,
Expand All @@ -99,6 +102,9 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
label_to_comm_rank_(
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
label_to_comm_rank)),
label_offsets_(
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
label_offsets)),
fan_out_(
reinterpret_cast<cugraph::c_api::cugraph_type_erased_host_array_view_t const*>(fan_out)),
rng_state_(reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(rng_state)),
Expand Down Expand Up @@ -267,8 +273,13 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
std::move(edge_id),
std::move(edge_type),
std::move(hop),
std::nullopt,
std::nullopt,
options_.retain_seeds_
? std::make_optional(raft::device_span<vertex_t const>{
start_vertices_->as_type<vertex_t>(), start_vertices_->size_})
: std::nullopt,
options_.retain_seeds_ ? std::make_optional(raft::device_span<size_t const>{
label_offsets_->as_type<size_t>(), label_offsets_->size_})
: std::nullopt,
offsets ? std::make_optional(
raft::device_span<size_t const>{offsets->data(), offsets->size()})
: std::nullopt,
Expand Down Expand Up @@ -304,8 +315,13 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
std::move(edge_id),
std::move(edge_type),
std::move(hop),
std::nullopt,
std::nullopt,
options_.retain_seeds_
? std::make_optional(raft::device_span<vertex_t const>{
start_vertices_->as_type<vertex_t>(), start_vertices_->size_})
: std::nullopt,
options_.retain_seeds_ ? std::make_optional(raft::device_span<size_t const>{
label_offsets_->as_type<size_t>(), label_offsets_->size_})
: std::nullopt,
offsets ? std::make_optional(
raft::device_span<size_t const>{offsets->data(), offsets->size()})
: std::nullopt,
Expand Down Expand Up @@ -402,6 +418,12 @@ extern "C" cugraph_error_code_t cugraph_sampling_options_create(
return CUGRAPH_SUCCESS;
}

extern "C" void cugraph_sampling_set_retain_seeds(cugraph_sampling_options_t* options, bool_t value)
{
auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_sampling_options_t*>(options);
internal_pointer->retain_seeds_ = value;
}

extern "C" void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options,
bool_t value)
{
Expand Down Expand Up @@ -871,13 +893,21 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample(
const cugraph_type_erased_device_array_view_t* start_vertex_labels,
const cugraph_type_erased_device_array_view_t* label_list,
const cugraph_type_erased_device_array_view_t* label_to_comm_rank,
const cugraph_type_erased_device_array_view_t* label_offsets,
const cugraph_type_erased_host_array_view_t* fan_out,
cugraph_rng_state_t* rng_state,
const cugraph_sampling_options_t* options,
bool_t do_expensive_check,
cugraph_sample_result_t** result,
cugraph_error_t** error)
{
auto options_cpp = *reinterpret_cast<cugraph::c_api::cugraph_sampling_options_t const*>(options);

CAPI_EXPECTS((!options_cpp.retain_seeds_) || (label_offsets != nullptr),
CUGRAPH_INVALID_INPUT,
"must specify label_offsets if retain_seeds is true",
*error);

CAPI_EXPECTS((start_vertex_labels == nullptr) ||
(reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
start_vertex_labels)
Expand Down Expand Up @@ -911,16 +941,16 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample(
"fan_out should be of type int",
*error);

uniform_neighbor_sampling_functor functor{
handle,
graph,
start_vertices,
start_vertex_labels,
label_list,
label_to_comm_rank,
fan_out,
rng_state,
*reinterpret_cast<cugraph::c_api::cugraph_sampling_options_t const*>(options),
do_expensive_check};
uniform_neighbor_sampling_functor functor{handle,
graph,
start_vertices,
start_vertex_labels,
label_list,
label_to_comm_rank,
label_offsets,
fan_out,
rng_state,
std::move(options_cpp),
do_expensive_check};
return cugraph::c_api::run_algorithm(graph, functor, result, error);
}
8 changes: 3 additions & 5 deletions cpp/src/traversal/bfs_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@

#include <raft/core/handle.hpp>

#include <rmm/exec_policy.hpp>

#include <thrust/copy.h>
#include <thrust/count.h>
#include <thrust/fill.h>
Expand Down Expand Up @@ -149,19 +147,19 @@ void bfs(raft::handle_t const& handle,
auto constexpr invalid_distance = std::numeric_limits<vertex_t>::max();
auto constexpr invalid_vertex = invalid_vertex_id<vertex_t>::value;

thrust::fill(rmm::exec_policy(handle.get_thrust_policy()),
thrust::fill(handle.get_thrust_policy(),
distances,
distances + push_graph_view.local_vertex_partition_range_size(),
invalid_distance);
thrust::fill(rmm::exec_policy(handle.get_thrust_policy()),
thrust::fill(handle.get_thrust_policy(),
predecessor_first,
predecessor_first + push_graph_view.local_vertex_partition_range_size(),
invalid_vertex);
auto vertex_partition = vertex_partition_device_view_t<vertex_t, GraphViewType::is_multi_gpu>(
push_graph_view.local_vertex_partition_view());
if (n_sources) {
thrust::for_each(
rmm::exec_policy(handle.get_thrust_policy()),
handle.get_thrust_policy(),
sources,
sources + n_sources,
[vertex_partition, distances, predecessor_first] __device__(auto v) {
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/c_api/create_graph_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ int test_create_sg_graph_csr()
NULL,
NULL,
NULL,
NULL,
h_fan_out_view,
rng_state,
sampling_options,
Expand Down
4 changes: 4 additions & 0 deletions cpp/tests/c_api/mg_uniform_neighbor_sample_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
d_start_labels_view,
NULL,
NULL,
NULL,
h_fan_out_view,
rng_state,
sampling_options,
Expand Down Expand Up @@ -565,6 +566,7 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
d_label_view,
NULL,
NULL,
NULL,
h_fan_out_view,
rng_state,
sampling_options,
Expand Down Expand Up @@ -841,6 +843,7 @@ int test_uniform_neighbor_sample_alex_bug(const cugraph_resource_handle_t* handl
d_start_labels_view,
d_label_list_view,
d_label_to_output_comm_rank_view,
NULL,
h_fan_out_view,
rng_state,
sampling_options,
Expand Down Expand Up @@ -1099,6 +1102,7 @@ int test_uniform_neighbor_sample_sort_by_hop(const cugraph_resource_handle_t* ha
d_start_labels_view,
d_label_list_view,
d_label_to_output_comm_rank_view,
NULL,
h_fan_out_view,
rng_state,
sampling_options,
Expand Down
2 changes: 2 additions & 0 deletions cpp/tests/c_api/uniform_neighbor_sample_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
d_start_labels_view,
NULL,
NULL,
NULL,
h_fan_out_view,
rng_state,
sampling_options,
Expand Down Expand Up @@ -661,6 +662,7 @@ int test_uniform_neighbor_sample_with_labels(const cugraph_resource_handle_t* ha
d_start_labels_view,
NULL,
NULL,
NULL,
h_fan_out_view,
rng_state,
sampling_options,
Expand Down
15 changes: 14 additions & 1 deletion python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def uniform_neighbor_sample(
prior_sources_behavior: str = None,
deduplicate_sources: bool = False,
renumber: bool = False,
retain_seeds: bool = False,
label_offsets: Sequence = None,
use_legacy_names: bool = True, # deprecated
compress_per_hop: bool = False,
compression: str = "COO",
Expand Down Expand Up @@ -142,6 +144,15 @@ def uniform_neighbor_sample(
will return the renumber map and renumber map offsets
as an additional dataframe.
retain_seeds: bool, optional (default=False)
If True, will retain the original seeds (original source vertices)
in the output even if they do not have outgoing neighbors.
label_offsets: integer sequence, optional (default=None)
Offsets of each label within the start vertex list.
Only used if retain_seeds is True. Required if retain_seeds
is True.
use_legacy_names: bool, optional (default=True)
Whether to use the legacy column names (sources, destinations).
If True, will use "sources" and "destinations" as the column names.
Expand Down Expand Up @@ -342,13 +353,15 @@ def uniform_neighbor_sample(
else None,
h_fan_out=fanout_vals,
with_replacement=with_replacement,
do_expensive_check=False,
do_expensive_check=True,
with_edge_properties=with_edge_properties,
random_state=random_state,
prior_sources_behavior=prior_sources_behavior,
deduplicate_sources=deduplicate_sources,
return_hops=return_hops,
renumber=renumber,
retain_seeds=retain_seeds,
label_offsets=label_offsets,
compression=compression,
compress_per_hop=compress_per_hop,
return_dict=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,3 @@ def test_mg_betweenness_centrality(
second_key="ref_bc",
epsilon=DEFAULT_EPSILON,
)

# Clean-up stored dataset edge-lists
dataset.unload()
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,3 @@ def test_mg_edge_betweenness_centrality(
second_key="ref_bc",
epsilon=DEFAULT_EPSILON,
)
# Clean-up stored dataset edge-lists
dataset.unload()
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.:
# Copyright (c) 2020-2024, NVIDIA CORPORATION.:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -111,11 +111,18 @@ def calc_betweenness_centrality(
else:
edge_attr = None

G = graph_file.get_graph(
download=True,
create_using=cugraph.Graph(directed=directed),
ignore_weights=not edgevals,
)
G = None
if multi_gpu_batch:
G = graph_file.get_dask_graph(
create_using=cugraph.Graph(directed=directed), ignore_weights=not edgevals
)
G.enable_batch()
else:
G = graph_file.get_graph(
download=True,
create_using=cugraph.Graph(directed=directed),
ignore_weights=not edgevals,
)

M = G.to_pandas_edgelist().rename(
columns={"src": "0", "dst": "1", "wgt": edge_attr}
Expand All @@ -130,8 +137,6 @@ def calc_betweenness_centrality(
)

assert G is not None and Gnx is not None
if multi_gpu_batch:
G.enable_batch()

calc_func = None
if k is not None and seed is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,12 @@ def setup_function():


def get_sg_graph(dataset, directed):
dataset.unload()
G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))

return G


def get_mg_graph(dataset, directed):
dataset.unload()
ddf = dataset.get_dask_edgelist()
dg = cugraph.Graph(directed=directed)
dg.from_dask_cudf_edgelist(
Expand Down Expand Up @@ -96,7 +94,6 @@ def test_dask_mg_betweenness_centrality(
benchmark,
):
g = get_sg_graph(dataset, directed)
dataset.unload()
dg = get_mg_graph(dataset, directed)
random_state = subset_seed

Expand Down Expand Up @@ -143,6 +140,3 @@ def test_dask_mg_betweenness_centrality(
diff = cupy.isclose(mg_bc_results, sg_bc_results)

assert diff.all()

# Clean-up stored dataset edge-lists
dataset.unload()
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,12 @@ def setup_function():


def get_sg_graph(dataset, directed):
dataset.unload()
G = dataset.get_graph(create_using=cugraph.Graph(directed=directed))

return G


def get_mg_graph(dataset, directed):
dataset.unload()
ddf = dataset.get_dask_edgelist()
dg = cugraph.Graph(directed=directed)
dg.from_dask_cudf_edgelist(
Expand Down Expand Up @@ -118,6 +116,3 @@ def test_dask_mg_degree(dask_client, dataset, directed):
check_names=False,
check_dtype=False,
)

# Clean-up stored dataset edge-lists
dataset.unload()
Loading

0 comments on commit 814daaa

Please sign in to comment.