Skip to content

Commit

Permalink
Merge branch 'branch-23.10' into dgl_cugraph_comparision
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbarghi-nv authored Aug 30, 2023
2 parents f99a518 + b5d8cbe commit a6f16b0
Show file tree
Hide file tree
Showing 25 changed files with 74 additions and 63 deletions.
4 changes: 4 additions & 0 deletions .github/copy-pr-bot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Configuration file for `copy-pr-bot` GitHub App
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/

enabled: true
1 change: 0 additions & 1 deletion .github/ops-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ auto_merger: true
branch_checker: true
label_checker: true
release_drafter: true
copy_prs: true
recently_updated: true
6 changes: 5 additions & 1 deletion ci/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ pytest \
tests
popd

# FIXME: TEMPORARILY disable single-GPU "MG" testing until
# https://github.com/rapidsai/cugraph/issues/3790 is closed
# When closed, replace -k "not _mg" with
# -k "not test_property_graph_mg" \
rapids-logger "pytest cugraph"
pushd python/cugraph/cugraph
export DASK_WORKER_DEVICES="0"
Expand All @@ -75,7 +79,7 @@ pytest \
--cov=cugraph \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \
--cov-report=term \
-k "not test_property_graph_mg" \
-k "not _mg" \
tests
popd

Expand Down
4 changes: 3 additions & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,7 @@ arch=$(uname -m)
if [[ "${arch}" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then
python ./ci/wheel_smoke_test_${package_name}.py
else
RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets python -m pytest ./python/${package_name}/${package_name}/tests
# FIXME: TEMPORARILY disable single-GPU "MG" testing until
# https://github.com/rapidsai/cugraph/issues/3790 is closed
RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets python -m pytest -k "not _mg" ./python/${package_name}/${package_name}/tests
fi
30 changes: 16 additions & 14 deletions cpp/src/traversal/od_shortest_distances_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,17 @@ size_t compute_kv_store_capacity(size_t new_min_size,

int32_t constexpr multi_partition_copy_block_size = 512; // tuning parameter

template <int32_t max_num_partitions, typename InputIterator, typename key_t, typename PartitionOp>
template <int32_t max_num_partitions,
typename InputIterator,
typename key_t,
typename PartitionOp,
typename KeyOp>
__global__ void multi_partition_copy(
InputIterator input_first,
InputIterator input_last,
raft::device_span<key_t*> output_buffer_ptrs,
PartitionOp partition_op, // returns max_num_partitions to discard
KeyOp key_op,
raft::device_span<size_t> partition_counters)
{
static_assert(max_num_partitions <= static_cast<int32_t>(std::numeric_limits<uint8_t>::max()));
Expand Down Expand Up @@ -283,7 +288,7 @@ __global__ void multi_partition_copy(
if (partition != static_cast<uint8_t>(max_num_partitions)) {
auto offset = block_start_offsets[partition] +
static_cast<size_t>(tmp_intra_block_offsets[partition] + tmp_offsets[i]);
*(output_buffer_ptrs[partition] + offset) = thrust::get<0>(*(input_first + tmp_idx));
*(output_buffer_ptrs[partition] + offset) = key_op(*(input_first + tmp_idx));
}
}
tmp_idx += gridDim.x * blockDim.x;
Expand Down Expand Up @@ -794,6 +799,7 @@ rmm::device_uvector<weight_t> od_shortest_distances(
split_thresholds.end(),
thrust::get<1>(pair))));
},
[] __device__(auto pair) { return thrust::get<0>(pair); },
raft::device_span<size_t>(d_counters.data(), d_counters.size()));

std::vector<size_t> h_counters(d_counters.size());
Expand Down Expand Up @@ -912,27 +918,22 @@ rmm::device_uvector<weight_t> od_shortest_distances(
thrust::fill(
handle.get_thrust_policy(), d_counters.begin(), d_counters.end(), size_t{0});
if (tmp_buffer.size() > 0) {
auto distance_first = thrust::make_transform_iterator(
tmp_buffer.begin(),
[key_to_dist_map = detail::kv_cuco_store_find_device_view_t(
key_to_dist_map.view())] __device__(auto key) {
return key_to_dist_map.find(key);
});
auto input_first = thrust::make_zip_iterator(tmp_buffer.begin(), distance_first);
raft::grid_1d_thread_t update_grid(tmp_buffer.size(),
multi_partition_copy_block_size,
handle.get_device_properties().maxGridSize[0]);
auto constexpr max_num_partitions =
static_cast<int32_t>(1 /* near queue */ + num_far_buffers);
multi_partition_copy<max_num_partitions>
<<<update_grid.num_blocks, update_grid.block_size, 0, handle.get_stream()>>>(
input_first,
input_first + tmp_buffer.size(),
tmp_buffer.begin(),
tmp_buffer.end(),
raft::device_span<key_t*>(d_buffer_ptrs.data(), d_buffer_ptrs.size()),
[split_thresholds = raft::device_span<weight_t const>(
[key_to_dist_map =
detail::kv_cuco_store_find_device_view_t(key_to_dist_map.view()),
split_thresholds = raft::device_span<weight_t const>(
d_split_thresholds.data(), d_split_thresholds.size()),
invalid_threshold] __device__(auto pair) {
auto dist = thrust::get<1>(pair);
invalid_threshold] __device__(auto key) {
auto dist = key_to_dist_map.find(key);
return static_cast<uint8_t>(
(dist < invalid_threshold)
? max_num_partitions /* discard */
Expand All @@ -942,6 +943,7 @@ rmm::device_uvector<weight_t> od_shortest_distances(
split_thresholds.end(),
dist)));
},
thrust::identity<key_t>{},
raft::device_span<size_t>(d_counters.data(), d_counters.size()));
}
std::vector<size_t> h_counters(d_counters.size());
Expand Down
8 changes: 4 additions & 4 deletions cpp/tests/traversal/od_shortest_distances_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,27 +225,27 @@ class Tests_ODShortestDistances
using Tests_ODShortestDistances_File = Tests_ODShortestDistances<cugraph::test::File_Usecase>;
using Tests_ODShortestDistances_Rmat = Tests_ODShortestDistances<cugraph::test::Rmat_Usecase>;

TEST_P(Tests_ODShortestDistances_File, DISABLED_CheckInt32Int32Float)
TEST_P(Tests_ODShortestDistances_File, CheckInt32Int32Float)
{
auto param = GetParam();
run_current_test<int32_t, int32_t, float>(std::get<0>(param), std::get<1>(param));
}

TEST_P(Tests_ODShortestDistances_Rmat, DISABLED_CheckInt32Int32Float)
TEST_P(Tests_ODShortestDistances_Rmat, CheckInt32Int32Float)
{
auto param = GetParam();
run_current_test<int32_t, int32_t, float>(
std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_ODShortestDistances_Rmat, DISABLED_CheckInt32Int64Float)
TEST_P(Tests_ODShortestDistances_Rmat, CheckInt32Int64Float)
{
auto param = GetParam();
run_current_test<int32_t, int64_t, float>(
std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
}

TEST_P(Tests_ODShortestDistances_Rmat, DISABLED_CheckInt64Int64Float)
TEST_P(Tests_ODShortestDistances_Rmat, CheckInt64Int64Float)
{
auto param = GetParam();
run_current_test<int64_t, int64_t, float>(
Expand Down
14 changes: 7 additions & 7 deletions python/cugraph/cugraph/link_prediction/jaccard.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,13 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True):
relative to the adjacency list, or that given by the specified vertex
pairs.
df['source'] : cudf.Series
The source vertex ID (will be identical to first if specified).
df['destination'] : cudf.Series
The destination vertex ID (will be identical to second if
df['first'] : cudf.Series
The first vertex ID of each pair (will be identical to first if specified).
df['second'] : cudf.Series
the second vertex ID of each pair (will be identical to second if
specified).
df['jaccard_coeff'] : cudf.Series
The computed jaccard coefficient between the first and the second
The computed Jaccard coefficient between the first and the second
vertex ID.
Examples
Expand Down Expand Up @@ -182,8 +182,8 @@ def jaccard_coefficient(G, ebunch=None, do_expensive_check=True):
the second vertex ID of each pair (will be identical to second if
specified).
df['jaccard_coeff'] : cudf.Series
The computed Jaccard coefficient between the source and destination
vertices.
The computed Jaccard coefficient between the first and the second
vertex ID.
Examples
--------
Expand Down
14 changes: 7 additions & 7 deletions python/cugraph/cugraph/link_prediction/sorensen.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True):
specified)
df['sorensen_coeff'] : cudf.Series
The computed Sorensen coefficient between the source and
destination vertices
The computed Sorensen coefficient between the first and the second
vertex ID.
Examples
--------
Expand Down Expand Up @@ -149,13 +149,13 @@ def sorensen_coefficient(G, ebunch=None, do_expensive_check=True):
relative to the adjacency list, or that given by the specified vertex
pairs.
df['source'] : cudf.Series
The source vertex ID (will be identical to first if specified).
df['destination'] : cudf.Series
The destination vertex ID (will be identical to second if
df['first'] : cudf.Series
The first vertex ID of each pair (will be identical to first if specified).
df['second'] : cudf.Series
the second vertex ID of each pair (will be identical to second if
specified).
df['sorensen_coeff'] : cudf.Series
The computed sorensen coefficient between the first and the second
The computed Sorensen coefficient between the first and the second
vertex ID.
Examples
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def input_expected_output(input_combo):


@pytest.mark.mg
def test_dask_betweenness_centrality(dask_client, benchmark, input_expected_output):
def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output):

dg = input_expected_output["MGGraph"]
k = input_expected_output["k"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def input_expected_output(input_combo):
# is_single_gpu(), reason="skipping MG testing on Single GPU system"
# )
@pytest.mark.mg
def test_dask_edge_betweenness_centrality(
def test_dask_mg_edge_betweenness_centrality(
dask_client, benchmark, input_expected_output
):
if input_expected_output is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def setup_function():
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("directed", IS_DIRECTED)
@pytest.mark.parametrize("input_data_path", DATASETS)
def test_dask_eigenvector_centrality(dask_client, directed, input_data_path):
def test_dask_mg_eigenvector_centrality(dask_client, directed, input_data_path):
input_data_path = input_data_path.as_posix()
print(f"dataset={input_data_path}")
chunksize = dcg.get_chunksize(input_data_path)
Expand Down Expand Up @@ -86,7 +86,7 @@ def test_dask_eigenvector_centrality(dask_client, directed, input_data_path):


@pytest.mark.mg
def test_dask_eigenvector_centrality_transposed_false(dask_client):
def test_dask_mg_eigenvector_centrality_transposed_false(dask_client):
input_data_path = DATASETS[0]

chunksize = dcg.get_chunksize(input_data_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def setup_function():
@pytest.mark.mg
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("directed", IS_DIRECTED)
def test_dask_katz_centrality(dask_client, directed):
def test_dask_mg_katz_centrality(dask_client, directed):

input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
print(f"dataset={input_data_path}")
Expand Down Expand Up @@ -96,7 +96,7 @@ def test_dask_katz_centrality(dask_client, directed):
@pytest.mark.mg
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("directed", IS_DIRECTED)
def test_dask_katz_centrality_nstart(dask_client, directed):
def test_dask_mg_katz_centrality_nstart(dask_client, directed):
input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
print(f"dataset={input_data_path}")
chunksize = dcg.get_chunksize(input_data_path)
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_dask_katz_centrality_nstart(dask_client, directed):


@pytest.mark.mg
def test_dask_katz_centrality_transposed_false(dask_client):
def test_dask_mg_katz_centrality_transposed_false(dask_client):
input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()

chunksize = dcg.get_chunksize(input_data_path)
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/tests/comms/test_comms_mg.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def setup_function():
# )
@pytest.mark.mg
@pytest.mark.parametrize("directed", IS_DIRECTED)
def test_dask_pagerank(dask_client, directed):
def test_dask_mg_pagerank(dask_client, directed):

# Initialize and run pagerank on two distributed graphs
# with same communicator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def setup_function():
# )
@pytest.mark.mg
@pytest.mark.parametrize("directed", IS_DIRECTED)
def test_dask_wcc(dask_client, directed):
def test_dask_mg_wcc(dask_client, directed):

input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix()
print(f"dataset={input_data_path}")
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/tests/core/test_k_core_mg.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_sg_k_core(dask_client, benchmark, input_expected_output):


@pytest.mark.mg
def test_dask_k_core(dask_client, benchmark, input_expected_output):
def test_dask_mg_k_core(dask_client, benchmark, input_expected_output):

dg = input_expected_output["MGGraph"]
core_number = input_expected_output["core_number"]
Expand All @@ -159,7 +159,7 @@ def test_dask_k_core(dask_client, benchmark, input_expected_output):


@pytest.mark.mg
def test_dask_k_core_invalid_input(dask_client):
def test_dask_mg_k_core_invalid_input(dask_client):
input_data_path = datasets[0]
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/tests/internals/test_renumber_mg.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client):
@pytest.mark.mg
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("directed", IS_DIRECTED)
def test_dask_pagerank(dask_client, directed):
def test_dask_mg_pagerank(dask_client, directed):
pandas.set_option("display.max_rows", 10000)

input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def input_expected_output(input_combo):
# is_single_gpu(), reason="skipping MG testing on Single GPU system"
# )
@pytest.mark.mg
def test_dask_hits(dask_client, benchmark, input_expected_output):
def test_dask_mg_hits(dask_client, benchmark, input_expected_output):

dg = input_expected_output["MGGraph"]

Expand Down Expand Up @@ -155,7 +155,7 @@ def test_dask_hits(dask_client, benchmark, input_expected_output):


@pytest.mark.mg
def test_dask_hits_transposed_false(dask_client):
def test_dask_mg_hits_transposed_false(dask_client):
input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()

chunksize = dcg.get_chunksize(input_data_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def setup_function():
@pytest.mark.parametrize("directed", IS_DIRECTED)
@pytest.mark.parametrize("has_precomputed_vertex_out_weight", HAS_PRECOMPUTED)
@pytest.mark.parametrize("has_guess", HAS_GUESS)
def test_dask_pagerank(
def test_dask_mg_pagerank(
dask_client,
personalization_perc,
directed,
Expand Down Expand Up @@ -215,7 +215,7 @@ def test_pagerank_invalid_personalization_dtype(dask_client):


@pytest.mark.mg
def test_dask_pagerank_transposed_false(dask_client):
def test_dask_mg_pagerank_transposed_false(dask_client):
dg = create_distributed_karate_graph(store_transposed=False)

warning_msg = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def input_expected_output(input_combo):


@pytest.mark.mg
def test_dask_jaccard(dask_client, benchmark, input_expected_output):
def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output):

dg = input_expected_output["MGGraph"]

Expand Down Expand Up @@ -154,7 +154,7 @@ def test_dask_jaccard(dask_client, benchmark, input_expected_output):


@pytest.mark.mg
def test_dask_weighted_jaccard(dask_client):
def test_dask_mg_weighted_jaccard(dask_client):
input_data_path = datasets[0]
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def input_expected_output(input_combo):
# is_single_gpu(), reason="skipping MG testing on Single GPU system"
# )
@pytest.mark.mg
def test_dask_overlap(dask_client, benchmark, input_expected_output):
def test_dask_mg_overlap(dask_client, benchmark, input_expected_output):

dg = input_expected_output["MGGraph"]

Expand Down Expand Up @@ -157,7 +157,7 @@ def test_dask_overlap(dask_client, benchmark, input_expected_output):


@pytest.mark.mg
def test_dask_weighted_overlap():
def test_dask_mg_weighted_overlap():
input_data_path = datasets[0]
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
Expand Down
Loading

0 comments on commit a6f16b0

Please sign in to comment.