diff --git a/cpp/include/cugraph_c/similarity_algorithms.h b/cpp/include/cugraph_c/similarity_algorithms.h index 48f3ff5d52d..12f55132fc7 100644 --- a/cpp/include/cugraph_c/similarity_algorithms.h +++ b/cpp/include/cugraph_c/similarity_algorithms.h @@ -164,13 +164,14 @@ cugraph_error_code_t cugraph_overlap_coefficients(const cugraph_resource_handle_ * be populated if error code is not CUGRAPH_SUCCESS * @return error code */ -cugraph_error_code_t cugraph_cosine_similarity_coefficients(const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - const cugraph_vertex_pairs_t* vertex_pairs, - bool_t use_weight, - bool_t do_expensive_check, - cugraph_similarity_result_t** result, - cugraph_error_t** error); +cugraph_error_code_t cugraph_cosine_similarity_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_vertex_pairs_t* vertex_pairs, + bool_t use_weight, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); /** * @brief Perform All-Pairs Jaccard similarity computation diff --git a/cpp/src/c_api/similarity.cpp b/cpp/src/c_api/similarity.cpp index 071f77e3172..36f1a74f3e0 100644 --- a/cpp/src/c_api/similarity.cpp +++ b/cpp/src/c_api/similarity.cpp @@ -211,20 +211,22 @@ struct all_pairs_similarity_functor : public cugraph::c_api::abstract_functor { vertices_->as_type(), vertices_->size_}) : std::nullopt, topk_ != SIZE_MAX ? std::make_optional(topk_) : std::nullopt); - - cugraph::unrenumber_int_vertices(handle_, - v1.data(), - v1.size(), - number_map->data(), - graph_view.vertex_partition_range_lasts(), - false); - - cugraph::unrenumber_int_vertices(handle_, - v2.data(), - v2.size(), - number_map->data(), - graph_view.vertex_partition_range_lasts(), - false); + + cugraph::unrenumber_int_vertices( + handle_, + v1.data(), + v1.size(), + number_map->data(), + graph_view.vertex_partition_range_lasts(), + false); + + cugraph::unrenumber_int_vertices( + handle_, + v2.data(), + v2.size(), + number_map->data(), + graph_view.vertex_partition_range_lasts(), + false); result_ = new cugraph::c_api::cugraph_similarity_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients, @@ -296,7 +298,8 @@ struct cosine_functor { std::optional> edge_weight_view, std::tuple, raft::device_span> vertex_pairs) { - return cugraph::cosine_similarity_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); + return cugraph::cosine_similarity_coefficients( + handle, graph_view, edge_weight_view, vertex_pairs); } template @@ -348,7 +351,8 @@ struct cosine_similarity_functor { std::optional> edge_weight_view, std::tuple, raft::device_span> vertex_pairs) { - return cugraph::cosine_similarity_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); + return cugraph::cosine_similarity_coefficients( + handle, graph_view, edge_weight_view, vertex_pairs); } template @@ -569,4 +573,4 @@ extern "C" cugraph_error_code_t cugraph_all_pairs_cosine_similarity_coefficients handle, graph, vertices, overlap_functor{}, use_weight, topk, do_expensive_check); return cugraph::c_api::run_algorithm(graph, functor, result, error); -} \ No newline at end of file +} diff --git a/cpp/tests/c_api/mg_similarity_test.c b/cpp/tests/c_api/mg_similarity_test.c index b53788d01d6..486ca34aaca 100644 --- a/cpp/tests/c_api/mg_similarity_test.c +++ b/cpp/tests/c_api/mg_similarity_test.c @@ -26,7 +26,16 @@ typedef int32_t vertex_t; typedef int32_t edge_t; typedef float weight_t; -typedef enum { JACCARD, SORENSEN, OVERLAP, COSINE, ALL_PAIRS_JACCARD, ALL_PAIRS_SORENSEN, ALL_PAIRS_OVERLAP, ALL_PAIRS_COSINE } similarity_t; +typedef enum { + JACCARD, + SORENSEN, + OVERLAP, + COSINE, + ALL_PAIRS_JACCARD, + ALL_PAIRS_SORENSEN, + ALL_PAIRS_OVERLAP, + ALL_PAIRS_COSINE +} similarity_t; int generic_similarity_test(const cugraph_resource_handle_t* handle, vertex_t* h_src, @@ -51,14 +60,14 @@ int generic_similarity_test(const cugraph_resource_handle_t* handle, cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - cugraph_graph_t* graph = NULL; - cugraph_similarity_result_t* result = NULL; - cugraph_vertex_pairs_t* vertex_pairs = NULL; - cugraph_type_erased_device_array_t* v1 = NULL; - cugraph_type_erased_device_array_t* v2 = NULL; - cugraph_type_erased_device_array_t* start_v = NULL; - cugraph_type_erased_device_array_view_t* v1_view = NULL; - cugraph_type_erased_device_array_view_t* v2_view = NULL; + cugraph_graph_t* graph = NULL; + cugraph_similarity_result_t* result = NULL; + cugraph_vertex_pairs_t* vertex_pairs = NULL; + cugraph_type_erased_device_array_t* v1 = NULL; + cugraph_type_erased_device_array_t* v2 = NULL; + cugraph_type_erased_device_array_t* start_v = NULL; + cugraph_type_erased_device_array_view_t* v1_view = NULL; + cugraph_type_erased_device_array_view_t* v2_view = NULL; cugraph_type_erased_device_array_view_t* start_v_view = NULL; ret_code = create_test_graph( @@ -67,12 +76,11 @@ int generic_similarity_test(const cugraph_resource_handle_t* handle, TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - if (topk == 0) { topk = SIZE_MAX;} + if (topk == 0) { topk = SIZE_MAX; } - if (cugraph_resource_handle_get_rank(handle) != 0) { num_pairs = 0;} + if (cugraph_resource_handle_get_rank(handle) != 0) { num_pairs = 0; } if (h_first != NULL && h_second != NULL) { - ret_code = cugraph_type_erased_device_array_create(handle, num_pairs, vertex_tid, &v1, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "v1 create failed."); @@ -97,17 +105,17 @@ int generic_similarity_test(const cugraph_resource_handle_t* handle, TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create vertex pairs failed."); } - if (h_start_vertices != NULL) { - ret_code = - cugraph_type_erased_device_array_create(handle, num_start_vertices, vertex_tid, &start_v, &ret_error); + ret_code = cugraph_type_erased_device_array_create( + handle, num_start_vertices, vertex_tid, &start_v, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "v1 create failed."); start_v_view = cugraph_type_erased_device_array_view(start_v); ret_code = cugraph_type_erased_device_array_view_copy_from_host( handle, start_v_view, (byte_t*)h_start_vertices, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "h_start_vertices copy_from_host failed."); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "h_start_vertices copy_from_host failed."); } switch (test_type) { @@ -179,7 +187,6 @@ int generic_similarity_test(const cugraph_resource_handle_t* handle, "similarity results don't match"); } - if (result != NULL) cugraph_similarity_result_free(result); if (vertex_pairs != NULL) cugraph_vertex_pairs_free(vertex_pairs); cugraph_mg_graph_free(graph); @@ -190,17 +197,17 @@ int generic_similarity_test(const cugraph_resource_handle_t* handle, int test_jaccard(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 10; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 10; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; - vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; vertex_t* h_start_vertices = NULL; weight_t h_result[] = {0.2, 0.666667, 0.333333, 0.4, 0.166667, 0.5, 0.2, 0.25, 0.25, 0.666667}; @@ -224,21 +231,21 @@ int test_jaccard(const cugraph_resource_handle_t* handle) int test_weighted_jaccard(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 7; - size_t num_pairs = 3; + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 3; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; weight_t h_wgt[] = { 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; - vertex_t h_first[] = {0, 0, 1}; - vertex_t h_second[] = {1, 2, 3}; + vertex_t h_first[] = {0, 0, 1}; + vertex_t h_second[] = {1, 2, 3}; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.357143, 0.208333, 0.0}; + weight_t h_result[] = {0.357143, 0.208333, 0.0}; return generic_similarity_test(handle, h_src, @@ -260,19 +267,21 @@ int test_weighted_jaccard(const cugraph_resource_handle_t* handle) int test_all_pairs_jaccard(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.2, 0.25, 0.666667, 0.333333, 0.2, 0.4, 0.166667, 0.5, 0.25, 0.4, 0.2, 0.25, 0.25, 0.666667, 0.166667, 0.2, 0.666667, 0.3333333, 0.25, 0.666667, 0.5, 0.25}; + weight_t h_result[] = {0.2, 0.25, 0.666667, 0.333333, 0.2, 0.4, 0.166667, 0.5, + 0.25, 0.4, 0.2, 0.25, 0.25, 0.666667, 0.166667, 0.2, + 0.666667, 0.3333333, 0.25, 0.666667, 0.5, 0.25}; return generic_similarity_test(handle, h_src, @@ -294,19 +303,20 @@ int test_all_pairs_jaccard(const cugraph_resource_handle_t* handle) int test_all_pairs_jaccard_with_start_vertices(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; - vertex_t h_start_vertices[] = {0, 1, 2}; - weight_t h_result[] = {0.2, 0.25, 0.666667, 0.333333, 0.2, 0.4, 0.166667, 0.5, 0.25, 0.4, 0.2, 0.25, 0.25}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; + vertex_t h_start_vertices[] = {0, 1, 2}; + weight_t h_result[] = { + 0.2, 0.25, 0.666667, 0.333333, 0.2, 0.4, 0.166667, 0.5, 0.25, 0.4, 0.2, 0.25, 0.25}; return generic_similarity_test(handle, h_src, @@ -328,19 +338,19 @@ int test_all_pairs_jaccard_with_start_vertices(const cugraph_resource_handle_t* int test_all_pairs_jaccard_with_topk(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 5; + size_t topk = 5; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.666667, 0.666667, 0.666667, 0.666667, 0.5}; + weight_t h_result[] = {0.666667, 0.666667, 0.666667, 0.666667, 0.5}; return generic_similarity_test(handle, h_src, @@ -360,21 +370,19 @@ int test_all_pairs_jaccard_with_topk(const cugraph_resource_handle_t* handle) ALL_PAIRS_JACCARD); } - - int test_sorensen(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 10; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 10; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; - vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; vertex_t* h_start_vertices = NULL; weight_t h_result[] = {0.333333, 0.8, 0.5, 0.571429, 0.285714, 0.666667, 0.333333, 0.4, 0.4, 0.8}; @@ -398,21 +406,21 @@ int test_sorensen(const cugraph_resource_handle_t* handle) int test_weighted_sorensen(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 7; - size_t num_pairs = 3; + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 3; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; weight_t h_wgt[] = { 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; - vertex_t h_first[] = {0, 0, 1}; - vertex_t h_second[] = {1, 2, 3}; + vertex_t h_first[] = {0, 0, 1}; + vertex_t h_second[] = {1, 2, 3}; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.526316, 0.344828, 0.000000}; + weight_t h_result[] = {0.526316, 0.344828, 0.000000}; return generic_similarity_test(handle, h_src, @@ -434,19 +442,21 @@ int test_weighted_sorensen(const cugraph_resource_handle_t* handle) int test_all_pairs_sorensen(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.333333, 0.4, 0.8, 0.5, 0.333333, 0.571429, 0.285714, 0.666667, 0.4, 0.571429, 0.333333, 0.4, 0.4, 0.8, 0.285714, 0.333333, 0.8, 0.5, 0.4, 0.8, 0.666667, 0.4}; + weight_t h_result[] = {0.333333, 0.4, 0.8, 0.5, 0.333333, 0.571429, 0.285714, 0.666667, + 0.4, 0.571429, 0.333333, 0.4, 0.4, 0.8, 0.285714, 0.333333, + 0.8, 0.5, 0.4, 0.8, 0.666667, 0.4}; return generic_similarity_test(handle, h_src, @@ -468,19 +478,31 @@ int test_all_pairs_sorensen(const cugraph_resource_handle_t* handle) int test_all_pairs_sorensen_with_start_vertices(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 0; - - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; - vertex_t h_start_vertices[] = {0, 1, 2}; - weight_t h_result[] = {0.333333, 0.4, 0.8, 0.5, 0.333333, 0.571429, 0.285714, 0.666667, 0.4, 0.571429, 0.333333, 0.4, 0.4}; + size_t topk = 0; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; + vertex_t h_start_vertices[] = {0, 1, 2}; + weight_t h_result[] = {0.333333, + 0.4, + 0.8, + 0.5, + 0.333333, + 0.571429, + 0.285714, + 0.666667, + 0.4, + 0.571429, + 0.333333, + 0.4, + 0.4}; return generic_similarity_test(handle, h_src, @@ -502,19 +524,19 @@ int test_all_pairs_sorensen_with_start_vertices(const cugraph_resource_handle_t* int test_all_pairs_sorensen_with_topk(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 5; + size_t topk = 5; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.8, 0.8, 0.8, 0.8, 0.666667}; + weight_t h_result[] = {0.8, 0.8, 0.8, 0.8, 0.666667}; return generic_similarity_test(handle, h_src, @@ -536,19 +558,19 @@ int test_all_pairs_sorensen_with_topk(const cugraph_resource_handle_t* handle) int test_overlap(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 10; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 10; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; - vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.5, 1, 0.5, 0.666667, 0.333333, 1, 0.333333, 0.5, 0.5, 1}; + weight_t h_result[] = {0.5, 1, 0.5, 0.666667, 0.333333, 1, 0.333333, 0.5, 0.5, 1}; return generic_similarity_test(handle, h_src, @@ -570,21 +592,21 @@ int test_overlap(const cugraph_resource_handle_t* handle) int test_weighted_overlap(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 7; - size_t num_pairs = 3; + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 3; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; weight_t h_wgt[] = { 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; - vertex_t h_first[] = {0, 0, 1}; - vertex_t h_second[] = {1, 2, 3}; + vertex_t h_first[] = {0, 0, 1}; + vertex_t h_second[] = {1, 2, 3}; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.714286, 0.416667, 0.000000}; + weight_t h_result[] = {0.714286, 0.416667, 0.000000}; return generic_similarity_test(handle, h_src, @@ -606,19 +628,21 @@ int test_weighted_overlap(const cugraph_resource_handle_t* handle) int test_all_pairs_overlap(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, 0.5, 0.666667, 0.333333, 0.5, 0.5, 1.0, 0.333333, 0.333333, 1.0, 0.5, 0.5, 1.0, 1.0, 0.5}; + weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, + 0.5, 0.666667, 0.333333, 0.5, 0.5, 1.0, 0.333333, 0.333333, + 1.0, 0.5, 0.5, 1.0, 1.0, 0.5}; return generic_similarity_test(handle, h_src, @@ -640,19 +664,20 @@ int test_all_pairs_overlap(const cugraph_resource_handle_t* handle) int test_all_pairs_overlap_with_start_vertices(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; - vertex_t h_start_vertices[] = {0, 1, 2}; - weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, 0.5, 0.666667, 0.333333, 0.5, 0.5}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; + vertex_t h_start_vertices[] = {0, 1, 2}; + weight_t h_result[] = { + 0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, 0.5, 0.666667, 0.333333, 0.5, 0.5}; return generic_similarity_test(handle, h_src, @@ -674,19 +699,19 @@ int test_all_pairs_overlap_with_start_vertices(const cugraph_resource_handle_t* int test_all_pairs_overlap_with_topk(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 5; + size_t topk = 5; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {1.0, 1.0, 1.0, 1.0, 1.0}; + weight_t h_result[] = {1.0, 1.0, 1.0, 1.0, 1.0}; return generic_similarity_test(handle, h_src, @@ -706,32 +731,21 @@ int test_all_pairs_overlap_with_topk(const cugraph_resource_handle_t* handle) ALL_PAIRS_OVERLAP); } - - - - - - - - - - - int test_cosine(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 10; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 10; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; - vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + vertex_t h_second[] = {1, 3, 4, 2, 3, 5, 3, 4, 5, 4}; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + weight_t h_result[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; return generic_similarity_test(handle, h_src, @@ -753,21 +767,21 @@ int test_cosine(const cugraph_resource_handle_t* handle) int test_weighted_cosine(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 7; - size_t num_pairs = 2; + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 2; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; weight_t h_wgt[] = { 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; - vertex_t h_first[] = {0, 0}; - vertex_t h_second[] = {1, 2}; + vertex_t h_first[] = {0, 0}; + vertex_t h_second[] = {1, 2}; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.990830, 0.976187}; + weight_t h_result[] = {0.990830, 0.976187}; return generic_similarity_test(handle, h_src, @@ -789,19 +803,21 @@ int test_weighted_cosine(const cugraph_resource_handle_t* handle) int test_all_pairs_cosine(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 0; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, 0.5, 0.666667, 0.333333, 0.5, 0.5, 1.0, 0.333333, 0.333333, 1.0, 0.5, 0.5, 1.0, 1.0, 0.5}; + weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, + 0.5, 0.666667, 0.333333, 0.5, 0.5, 1.0, 0.333333, 0.333333, + 1.0, 0.5, 0.5, 1.0, 1.0, 0.5}; return generic_similarity_test(handle, h_src, @@ -823,19 +839,20 @@ int test_all_pairs_cosine(const cugraph_resource_handle_t* handle) int test_all_pairs_cosine_with_start_vertices(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 0; + size_t topk = 0; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; - vertex_t h_start_vertices[] = {0, 1, 2}; - weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, 0.5, 0.666667, 0.333333, 0.5, 0.5}; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; + vertex_t h_start_vertices[] = {0, 1, 2}; + weight_t h_result[] = { + 0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, 0.5, 0.666667, 0.333333, 0.5, 0.5}; return generic_similarity_test(handle, h_src, @@ -857,19 +874,19 @@ int test_all_pairs_cosine_with_start_vertices(const cugraph_resource_handle_t* h int test_all_pairs_cosine_with_topk(const cugraph_resource_handle_t* handle) { - size_t num_edges = 16; - size_t num_vertices = 6; - size_t num_pairs = 0; + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 0; size_t num_start_vertices = 3; - size_t topk = 5; + size_t topk = 5; - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t* h_first = NULL; - vertex_t* h_second = NULL; + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t* h_first = NULL; + vertex_t* h_second = NULL; vertex_t* h_start_vertices = NULL; - weight_t h_result[] = {1.0, 1.0, 1.0, 1.0, 1.0}; + weight_t h_result[] = {1.0, 1.0, 1.0, 1.0, 1.0}; return generic_similarity_test(handle, h_src, @@ -889,31 +906,6 @@ int test_all_pairs_cosine_with_topk(const cugraph_resource_handle_t* handle) ALL_PAIRS_COSINE); } - - - - - - - - - - - - - - - - - - - - - - - - - /******************************************************************************/ int main(int argc, char** argv) diff --git a/cpp/tests/c_api/similarity_test.c b/cpp/tests/c_api/similarity_test.c index 0d544ff82d6..70e0cb6fb95 100644 --- a/cpp/tests/c_api/similarity_test.c +++ b/cpp/tests/c_api/similarity_test.c @@ -26,7 +26,7 @@ typedef int32_t vertex_t; typedef int32_t edge_t; typedef float weight_t; -typedef enum { JACCARD, SORENSEN, OVERLAP, COSINE} similarity_t; +typedef enum { JACCARD, SORENSEN, OVERLAP, COSINE } similarity_t; int generic_similarity_test(vertex_t* h_src, vertex_t* h_dst, @@ -706,7 +706,9 @@ int test_all_pairs_cosine() weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; - weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, 0.5, 0.666667, 0.333333, 0.5, 0.5, 1.0, 0.333333, 0.333333, 1.0, 0.5, 0.5, 1.0, 1.0, 0.5}; + weight_t h_result[] = {0.5, 0.5, 1.0, 0.5, 0.5, 0.666667, 0.333333, 1.0, + 0.5, 0.666667, 0.333333, 0.5, 0.5, 1.0, 0.333333, 0.333333, + 1.0, 0.5, 0.5, 1.0, 1.0, 0.5}; return generic_all_pairs_similarity_test(h_src, h_dst, @@ -964,8 +966,6 @@ int test_all_pairs_cosine_topk() COSINE); } - - int test_weighted_all_pairs_cosine() { size_t num_edges = 16; @@ -1016,7 +1016,7 @@ int test_weighted_all_pairs_cosine() int main(int argc, char** argv) { int result = 0; - + result |= RUN_TEST(test_jaccard); result |= RUN_TEST(test_sorensen); result |= RUN_TEST(test_overlap); diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py index 9cd8d32eb3c..ada1fec74cb 100644 --- a/python/cugraph/cugraph/__init__.py +++ b/python/cugraph/cugraph/__init__.py @@ -85,7 +85,7 @@ all_pairs_sorensen, cosine, cosine_coefficient, - all_pairs_cosine + all_pairs_cosine, ) from cugraph.traversal import ( diff --git a/python/cugraph/cugraph/dask/link_prediction/cosine.py b/python/cugraph/cugraph/dask/link_prediction/cosine.py index 4bd341d00fc..d10abbf9976 100644 --- a/python/cugraph/cugraph/dask/link_prediction/cosine.py +++ b/python/cugraph/cugraph/dask/link_prediction/cosine.py @@ -198,10 +198,11 @@ def cosine(input_graph, vertex_pair=None, use_weight=False): def all_pairs_cosine( - input_graph, - vertices: cudf.Series = None, - use_weight: bool = False, - topk: int = None): + input_graph, + vertices: cudf.Series = None, + use_weight: bool = False, + topk: int = None, +): """ Compute the All Pairs Cosine similarity between all pairs of vertices specified. All pairs Cosine similarity is defined between two sets as the ratio of the volume @@ -239,7 +240,7 @@ def all_pairs_cosine( Flag to indicate whether to compute weighted cosine (if use_weight==True) or un-weighted cosine (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -278,13 +279,11 @@ def all_pairs_cosine( ) if not isinstance(vertices, (dask_cudf.Series)): - vertices = dask_cudf.from_cudf( - vertices, npartitions=get_n_workers() - ) + vertices = dask_cudf.from_cudf(vertices, npartitions=get_n_workers()) if input_graph.renumbered: vertices = input_graph.lookup_internal_vertex_id(vertices) - + n_workers = get_n_workers() vertices = vertices.repartition(npartitions=n_workers) vertices = persist_dask_df_equal_parts_per_worker(vertices, client) diff --git a/python/cugraph/cugraph/dask/link_prediction/jaccard.py b/python/cugraph/cugraph/dask/link_prediction/jaccard.py index 85c2edab2cf..8c6c94a144f 100644 --- a/python/cugraph/cugraph/dask/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/dask/link_prediction/jaccard.py @@ -196,10 +196,11 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False): def all_pairs_jaccard( - input_graph, - vertices: cudf.Series = None, - use_weight: bool = False, - topk: int = None): + input_graph, + vertices: cudf.Series = None, + use_weight: bool = False, + topk: int = None, +): """ Compute the All Pairs Jaccard similarity between all pairs of vertices specified. All pairs Jaccard similarity is defined between two sets as the ratio of the volume @@ -237,7 +238,7 @@ def all_pairs_jaccard( Flag to indicate whether to compute weighted jaccard (if use_weight==True) or un-weighted jaccard (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -276,13 +277,11 @@ def all_pairs_jaccard( ) if not isinstance(vertices, (dask_cudf.Series)): - vertices = dask_cudf.from_cudf( - vertices, npartitions=get_n_workers() - ) + vertices = dask_cudf.from_cudf(vertices, npartitions=get_n_workers()) if input_graph.renumbered: vertices = input_graph.lookup_internal_vertex_id(vertices) - + n_workers = get_n_workers() vertices = vertices.repartition(npartitions=n_workers) vertices = persist_dask_df_equal_parts_per_worker(vertices, client) diff --git a/python/cugraph/cugraph/dask/link_prediction/overlap.py b/python/cugraph/cugraph/dask/link_prediction/overlap.py index 202d148937d..a27eb6eb98d 100644 --- a/python/cugraph/cugraph/dask/link_prediction/overlap.py +++ b/python/cugraph/cugraph/dask/link_prediction/overlap.py @@ -198,10 +198,11 @@ def overlap(input_graph, vertex_pair=None, use_weight=False): def all_pairs_overlap( - input_graph, - vertices: cudf.Series = None, - use_weight: bool = False, - topk: int = None): + input_graph, + vertices: cudf.Series = None, + use_weight: bool = False, + topk: int = None, +): """ Compute the All Pairs Overlap similarity between all pairs of vertices specified. All pairs Overlap Coefficient is defined between two sets as the ratio of the volume @@ -239,7 +240,7 @@ def all_pairs_overlap( Flag to indicate whether to compute weighted overlap (if use_weight==True) or un-weighted overlap (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -278,13 +279,11 @@ def all_pairs_overlap( ) if not isinstance(vertices, (dask_cudf.Series)): - vertices = dask_cudf.from_cudf( - vertices, npartitions=get_n_workers() - ) + vertices = dask_cudf.from_cudf(vertices, npartitions=get_n_workers()) if input_graph.renumbered: vertices = input_graph.lookup_internal_vertex_id(vertices) - + n_workers = get_n_workers() vertices = vertices.repartition(npartitions=n_workers) vertices = persist_dask_df_equal_parts_per_worker(vertices, client) @@ -323,4 +322,4 @@ def all_pairs_overlap( ddf = input_graph.unrenumber(ddf, "first") ddf = input_graph.unrenumber(ddf, "second") - return ddf \ No newline at end of file + return ddf diff --git a/python/cugraph/cugraph/dask/link_prediction/sorensen.py b/python/cugraph/cugraph/dask/link_prediction/sorensen.py index 728903327bb..529d1df1ef7 100644 --- a/python/cugraph/cugraph/dask/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/dask/link_prediction/sorensen.py @@ -194,10 +194,11 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False): def all_pairs_sorensen( - input_graph, - vertices: cudf.Series = None, - use_weight: bool = False, - topk: int = None): + input_graph, + vertices: cudf.Series = None, + use_weight: bool = False, + topk: int = None, +): """ Compute the All Pairs Sorensen similarity between all pairs of vertices specified. All pairs Sorensen coefficient is defined between two sets as the ratio of twice the @@ -235,7 +236,7 @@ def all_pairs_sorensen( Flag to indicate whether to compute weighted sorensen (if use_weight==True) or un-weighted sorensen (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -274,13 +275,11 @@ def all_pairs_sorensen( ) if not isinstance(vertices, (dask_cudf.Series)): - vertices = dask_cudf.from_cudf( - vertices, npartitions=get_n_workers() - ) + vertices = dask_cudf.from_cudf(vertices, npartitions=get_n_workers()) if input_graph.renumbered: vertices = input_graph.lookup_internal_vertex_id(vertices) - + n_workers = get_n_workers() vertices = vertices.repartition(npartitions=n_workers) vertices = persist_dask_df_equal_parts_per_worker(vertices, client) diff --git a/python/cugraph/cugraph/link_prediction/cosine.py b/python/cugraph/cugraph/link_prediction/cosine.py index 297fa15d336..9c7cdd2cfbf 100644 --- a/python/cugraph/cugraph/link_prediction/cosine.py +++ b/python/cugraph/cugraph/link_prediction/cosine.py @@ -235,11 +235,12 @@ def cosine_coefficient( return df + def all_pairs_cosine( input_graph: Graph, vertices: cudf.Series = None, use_weight: bool = False, - topk: int = None + topk: int = None, ): """ Compute the All Pairs Cosine similarity between all pairs of vertices specified. @@ -274,7 +275,7 @@ def all_pairs_cosine( Flag to indicate whether to compute weighted cosine (if use_weight==True) or un-weighted cosine (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -314,8 +315,9 @@ def all_pairs_cosine( if isinstance(vertices, list): vertices = cudf.Series( - vertices, dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype - ) + vertices, + dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype, + ) if input_graph.renumbered is True: if isinstance(vertices, cudf.DataFrame): @@ -325,8 +327,6 @@ def all_pairs_cosine( else: vertices = input_graph.lookup_internal_vertex_id(vertices) - - first, second, cosine_coeff = pylibcugraph_all_pairs_cosine_coefficients( resource_handle=ResourceHandle(), graph=input_graph._plc_graph, @@ -340,13 +340,8 @@ def all_pairs_cosine( vertex_pair["second"] = second if input_graph.renumbered: - vertex_pair = input_graph.unrenumber( - vertex_pair, "first", preserve_order=True - ) - vertex_pair = input_graph.unrenumber( - vertex_pair, "second", preserve_order=True - ) - + vertex_pair = input_graph.unrenumber(vertex_pair, "first", preserve_order=True) + vertex_pair = input_graph.unrenumber(vertex_pair, "second", preserve_order=True) df = vertex_pair df["cosine_coeff"] = cudf.Series(cosine_coeff) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index 87585f76d10..5db28e5a33b 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -240,11 +240,12 @@ def jaccard_coefficient( return df + def all_pairs_jaccard( input_graph: Graph, vertices: cudf.Series = None, use_weight: bool = False, - topk: int = None + topk: int = None, ): """ Compute the All Pairs Jaccard similarity between all pairs of vertices specified. @@ -282,7 +283,7 @@ def all_pairs_jaccard( Flag to indicate whether to compute weighted jaccard (if use_weight==True) or un-weighted jaccard (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -322,8 +323,9 @@ def all_pairs_jaccard( if isinstance(vertices, list): vertices = cudf.Series( - vertices, dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype - ) + vertices, + dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype, + ) if input_graph.renumbered is True: if isinstance(vertices, cudf.DataFrame): @@ -333,8 +335,6 @@ def all_pairs_jaccard( else: vertices = input_graph.lookup_internal_vertex_id(vertices) - - first, second, jaccard_coeff = pylibcugraph_all_pairs_jaccard_coefficients( resource_handle=ResourceHandle(), graph=input_graph._plc_graph, @@ -348,13 +348,8 @@ def all_pairs_jaccard( vertex_pair["second"] = second if input_graph.renumbered: - vertex_pair = input_graph.unrenumber( - vertex_pair, "first", preserve_order=True - ) - vertex_pair = input_graph.unrenumber( - vertex_pair, "second", preserve_order=True - ) - + vertex_pair = input_graph.unrenumber(vertex_pair, "first", preserve_order=True) + vertex_pair = input_graph.unrenumber(vertex_pair, "second", preserve_order=True) df = vertex_pair df["jaccard_coeff"] = cudf.Series(jaccard_coeff) diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index 48357900e16..5f6f74dba59 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -278,7 +278,7 @@ def all_pairs_overlap( input_graph: Graph, vertices: cudf.Series = None, use_weight: bool = False, - topk: int = None + topk: int = None, ): """ Compute the All Pairs Overlap Coefficient between each pair of vertices connected @@ -318,7 +318,7 @@ def all_pairs_overlap( Flag to indicate whether to compute weighted overlap (if use_weight==True) or un-weighted overlap (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -358,8 +358,9 @@ def all_pairs_overlap( if isinstance(vertices, list): vertices = cudf.Series( - vertices, dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype - ) + vertices, + dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype, + ) if input_graph.renumbered is True: if isinstance(vertices, cudf.DataFrame): @@ -369,8 +370,6 @@ def all_pairs_overlap( else: vertices = input_graph.lookup_internal_vertex_id(vertices) - - first, second, overlap_coeff = pylibcugraph_all_pairs_overlap_coefficients( resource_handle=ResourceHandle(), graph=input_graph._plc_graph, @@ -384,15 +383,10 @@ def all_pairs_overlap( vertex_pair["second"] = second if input_graph.renumbered: - vertex_pair = input_graph.unrenumber( - vertex_pair, "first", preserve_order=True - ) - vertex_pair = input_graph.unrenumber( - vertex_pair, "second", preserve_order=True - ) - + vertex_pair = input_graph.unrenumber(vertex_pair, "first", preserve_order=True) + vertex_pair = input_graph.unrenumber(vertex_pair, "second", preserve_order=True) df = vertex_pair df["overlap_coeff"] = cudf.Series(overlap_coeff) - return df \ No newline at end of file + return df diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 085e11398fe..584fe0dcbc9 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -277,7 +277,7 @@ def all_pairs_sorensen( input_graph: Graph, vertices: cudf.Series = None, use_weight: bool = False, - topk: int = None + topk: int = None, ): """ Compute All Pairs the Sorensen coefficient between each pair of vertices connected @@ -315,7 +315,7 @@ def all_pairs_sorensen( Flag to indicate whether to compute weighted sorensen (if use_weight==True) or un-weighted sorensen (if use_weight==False). 'input_graph' must be weighted if 'use_weight=True'. - + topk : int, optional (default=None) Specify the number of answers to return otherwise returns the entire solution @@ -355,8 +355,9 @@ def all_pairs_sorensen( if isinstance(vertices, list): vertices = cudf.Series( - vertices, dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype - ) + vertices, + dtype=input_graph.edgelist.edgelist_df[input_graph.srcCol].dtype, + ) if input_graph.renumbered is True: if isinstance(vertices, cudf.DataFrame): @@ -366,8 +367,6 @@ def all_pairs_sorensen( else: vertices = input_graph.lookup_internal_vertex_id(vertices) - - first, second, sorensen_coeff = pylibcugraph_all_pairs_sorensen_coefficients( resource_handle=ResourceHandle(), graph=input_graph._plc_graph, @@ -381,13 +380,8 @@ def all_pairs_sorensen( vertex_pair["second"] = second if input_graph.renumbered: - vertex_pair = input_graph.unrenumber( - vertex_pair, "first", preserve_order=True - ) - vertex_pair = input_graph.unrenumber( - vertex_pair, "second", preserve_order=True - ) - + vertex_pair = input_graph.unrenumber(vertex_pair, "first", preserve_order=True) + vertex_pair = input_graph.unrenumber(vertex_pair, "second", preserve_order=True) df = vertex_pair df["sorensen_coeff"] = cudf.Series(sorensen_coeff) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_cosine_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_cosine_mg.py index 88d292dec76..f85508cb089 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_cosine_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_cosine_mg.py @@ -64,7 +64,17 @@ def input_combo(request): tests or other parameterized fixtures. """ parameters = dict( - zip(("graph_file", "directed", "has_vertex_pair", "has_vertices", "has_topk", "is_weighted"), request.param) + zip( + ( + "graph_file", + "directed", + "has_vertex_pair", + "has_vertices", + "has_topk", + "is_weighted", + ), + request.param, + ) ) return parameters @@ -150,7 +160,7 @@ def input_expected_output_all_pairs(input_combo): else: vertices = None - + if has_topk: topk = 5 else: @@ -160,7 +170,10 @@ def input_expected_output_all_pairs(input_combo): print("vertices ", vertices, " is_weighted = ", is_weighted) input_combo["topk"] = topk sg_cugraph_all_pairs_cosine = cugraph.all_pairs_cosine( - G, vertices=input_combo["vertices"], topk=input_combo["topk"], use_weight=is_weighted + G, + vertices=input_combo["vertices"], + topk=input_combo["topk"], + use_weight=is_weighted, ) # Save the results back to the input_combo dictionary to prevent redundant # cuGraph runs. Other tests using the input_combo fixture will look for @@ -235,16 +248,20 @@ def test_dask_mg_cosine(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_mg_all_pairs_cosine(dask_client, benchmark, input_expected_output_all_pairs): +def test_dask_mg_all_pairs_cosine( + dask_client, benchmark, input_expected_output_all_pairs +): dg = input_expected_output_all_pairs["MGGraph"] - use_weight = input_expected_output_all_pairs["is_weighted"] - result_cosine = benchmark( - dcg.all_pairs_cosine, dg, vertices=input_expected_output_all_pairs["vertices"], topk=input_expected_output_all_pairs["topk"], use_weight=use_weight + dcg.all_pairs_cosine, + dg, + vertices=input_expected_output_all_pairs["vertices"], + topk=input_expected_output_all_pairs["topk"], + use_weight=use_weight, ) result_cosine = ( diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py index 1f5e811f291..34ee72e799b 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py @@ -350,13 +350,20 @@ def test_all_pairs_jaccard(): # Call Jaccard jaccard_results = cugraph.jaccard(G) - + # Remove self loop - jaccard_results = jaccard_results[jaccard_results['first'] != jaccard_results['second']].reset_index(drop=True) - + jaccard_results = jaccard_results[ + jaccard_results["first"] != jaccard_results["second"] + ].reset_index(drop=True) + all_pairs_jaccard_results = cugraph.all_pairs_jaccard(G) - assert_frame_equal(jaccard_results.head(), all_pairs_jaccard_results.head(), check_dtype=False, check_like=True) + assert_frame_equal( + jaccard_results.head(), + all_pairs_jaccard_results.head(), + check_dtype=False, + check_like=True, + ) # FIXME @@ -368,23 +375,30 @@ def test_all_pairs_jaccard_with_vertices(): # Call Jaccard jaccard_results = cugraph.jaccard(G) - + # Remove self loop - jaccard_results = jaccard_results[jaccard_results['first'] != jaccard_results['second']].reset_index(drop=True) + jaccard_results = jaccard_results[ + jaccard_results["first"] != jaccard_results["second"] + ].reset_index(drop=True) vertices = [0, 1, 2] - mask_first = jaccard_results['first'].isin(vertices) - mask_second = jaccard_results['second'].isin(vertices) - # mask = [v in vertices for v in (jaccard_results['first'].to_pandas() or jaccard_results['second'].to_pandas())] + mask_first = jaccard_results["first"].isin(vertices) + mask_second = jaccard_results["second"].isin(vertices) + # mask = [v in vertices for v in (jaccard_results['first'].to_pandas() + # or jaccard_results['second'].to_pandas())] mask = [f or s for (f, s) in zip(mask_first.to_pandas(), mask_second.to_pandas())] jaccard_results = jaccard_results[mask].reset_index(drop=True) # Call all-pairs Jaccard - all_pairs_jaccard_results = cugraph.all_pairs_jaccard(G, vertices=cudf.Series(vertices, dtype="int32")) + all_pairs_jaccard_results = cugraph.all_pairs_jaccard( + G, vertices=cudf.Series(vertices, dtype="int32") + ) - assert_frame_equal(jaccard_results, all_pairs_jaccard_results, check_dtype=False, check_like=True) + assert_frame_equal( + jaccard_results, all_pairs_jaccard_results, check_dtype=False, check_like=True + ) @pytest.mark.sg @@ -396,12 +410,21 @@ def test_all_pairs_jaccard_with_topk(): jaccard_results = cugraph.jaccard(G) topk = 4 - + # Remove self loop - jaccard_results = jaccard_results[jaccard_results['first'] != jaccard_results['second']].\ - sort_values(["jaccard_coeff", "first", "second"], ascending=False).reset_index(drop=True)[:topk] + jaccard_results = ( + jaccard_results[jaccard_results["first"] != jaccard_results["second"]] + .sort_values(["jaccard_coeff", "first", "second"], ascending=False) + .reset_index(drop=True)[:topk] + ) # Call all-pairs Jaccard - all_pairs_jaccard_results = cugraph.all_pairs_jaccard(G, topk=topk).sort_values(["first", "second"], ascending=False).reset_index(drop=True) + all_pairs_jaccard_results = ( + cugraph.all_pairs_jaccard(G, topk=topk) + .sort_values(["first", "second"], ascending=False) + .reset_index(drop=True) + ) - assert_frame_equal(jaccard_results, all_pairs_jaccard_results, check_dtype=False, check_like=True) + assert_frame_equal( + jaccard_results, all_pairs_jaccard_results, check_dtype=False, check_like=True + ) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py index 063a9aa00b0..d907a0dfff2 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py @@ -64,7 +64,17 @@ def input_combo(request): tests or other parameterized fixtures. """ parameters = dict( - zip(("graph_file", "directed", "has_vertex_pair", "has_vertices", "has_topk", "is_weighted"), request.param) + zip( + ( + "graph_file", + "directed", + "has_vertex_pair", + "has_vertices", + "has_topk", + "is_weighted", + ), + request.param, + ) ) return parameters @@ -150,7 +160,7 @@ def input_expected_output_all_pairs(input_combo): else: vertices = None - + if has_topk: topk = 5 else: @@ -160,7 +170,10 @@ def input_expected_output_all_pairs(input_combo): print("vertices ", vertices, " is_weighted = ", is_weighted) input_combo["topk"] = topk sg_cugraph_all_pairs_jaccard = cugraph.all_pairs_jaccard( - G, vertices=input_combo["vertices"], topk=input_combo["topk"], use_weight=is_weighted + G, + vertices=input_combo["vertices"], + topk=input_combo["topk"], + use_weight=is_weighted, ) # Save the results back to the input_combo dictionary to prevent redundant # cuGraph runs. Other tests using the input_combo fixture will look for @@ -235,16 +248,20 @@ def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_mg_all_pairs_jaccard(dask_client, benchmark, input_expected_output_all_pairs): +def test_dask_mg_all_pairs_jaccard( + dask_client, benchmark, input_expected_output_all_pairs +): dg = input_expected_output_all_pairs["MGGraph"] - use_weight = input_expected_output_all_pairs["is_weighted"] - result_jaccard = benchmark( - dcg.all_pairs_jaccard, dg, vertices=input_expected_output_all_pairs["vertices"], topk=input_expected_output_all_pairs["topk"], use_weight=use_weight + dcg.all_pairs_jaccard, + dg, + vertices=input_expected_output_all_pairs["vertices"], + topk=input_expected_output_all_pairs["topk"], + use_weight=use_weight, ) result_jaccard = ( diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py index b864f0dbbdf..9999e994061 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py @@ -251,13 +251,20 @@ def test_all_pairs_overlap(): # Call Overlap overlap_results = cugraph.overlap(G) - + # Remove self loop - overlap_results = overlap_results[overlap_results['first'] != overlap_results['second']].reset_index(drop=True) - + overlap_results = overlap_results[ + overlap_results["first"] != overlap_results["second"] + ].reset_index(drop=True) + all_pairs_overlap_results = cugraph.all_pairs_overlap(G) - assert_frame_equal(overlap_results.head(), all_pairs_overlap_results.head(), check_dtype=False, check_like=True) + assert_frame_equal( + overlap_results.head(), + all_pairs_overlap_results.head(), + check_dtype=False, + check_like=True, + ) # FIXME @@ -269,23 +276,30 @@ def test_all_pairs_overlap_with_vertices(): # Call Overlap overlap_results = cugraph.overlap(G) - + # Remove self loop - overlap_results = overlap_results[overlap_results['first'] != overlap_results['second']].reset_index(drop=True) + overlap_results = overlap_results[ + overlap_results["first"] != overlap_results["second"] + ].reset_index(drop=True) vertices = [0, 1, 2] - mask_first = overlap_results['first'].isin(vertices) - mask_second = overlap_results['second'].isin(vertices) - # mask = [v in vertices for v in (overlap_results['first'].to_pandas() or overlap_results['second'].to_pandas())] + mask_first = overlap_results["first"].isin(vertices) + mask_second = overlap_results["second"].isin(vertices) + # mask = [v in vertices for v in (overlap_results['first'].to_pandas() + # or overlap_results['second'].to_pandas())] mask = [f or s for (f, s) in zip(mask_first.to_pandas(), mask_second.to_pandas())] overlap_results = overlap_results[mask].reset_index(drop=True) # Call all-pairs Overlap - all_pairs_overlap_results = cugraph.all_pairs_overlap(G, vertices=cudf.Series(vertices, dtype="int32")) + all_pairs_overlap_results = cugraph.all_pairs_overlap( + G, vertices=cudf.Series(vertices, dtype="int32") + ) - assert_frame_equal(overlap_results, all_pairs_overlap_results, check_dtype=False, check_like=True) + assert_frame_equal( + overlap_results, all_pairs_overlap_results, check_dtype=False, check_like=True + ) @pytest.mark.sg @@ -297,12 +311,21 @@ def test_all_pairs_overlap_with_topk(): overlap_results = cugraph.overlap(G) topk = 4 - + # Remove self loop - overlap_results = overlap_results[overlap_results['first'] != overlap_results['second']].\ - sort_values(["overlap_coeff", "first", "second"], ascending=False).reset_index(drop=True)[:topk] + overlap_results = ( + overlap_results[overlap_results["first"] != overlap_results["second"]] + .sort_values(["overlap_coeff", "first", "second"], ascending=False) + .reset_index(drop=True)[:topk] + ) # Call all-pairs overlap - all_pairs_overlap_results = cugraph.all_pairs_overlap(G, topk=topk).sort_values(["first", "second"], ascending=False).reset_index(drop=True) + all_pairs_overlap_results = ( + cugraph.all_pairs_overlap(G, topk=topk) + .sort_values(["first", "second"], ascending=False) + .reset_index(drop=True) + ) - assert_frame_equal(overlap_results, all_pairs_overlap_results, check_dtype=False, check_like=True) + assert_frame_equal( + overlap_results, all_pairs_overlap_results, check_dtype=False, check_like=True + ) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py index 77aabea868b..3793ceb4b93 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py @@ -150,7 +150,7 @@ def input_expected_output_all_pairs(input_combo): else: vertices = None - + if has_topk: topk = 5 else: @@ -159,7 +159,10 @@ def input_expected_output_all_pairs(input_combo): input_combo["vertices"] = vertices input_combo["topk"] = topk sg_cugraph_all_pairs_overlap = cugraph.all_pairs_overlap( - G, vertices=input_combo["vertices"], topk=input_combo["topk"], use_weight=is_weighted + G, + vertices=input_combo["vertices"], + topk=input_combo["topk"], + use_weight=is_weighted, ) # Save the results back to the input_combo dictionary to prevent redundant # cuGraph runs. Other tests using the input_combo fixture will look for @@ -237,16 +240,20 @@ def test_dask_mg_overlap(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_mg_all_pairs_overlap(dask_client, benchmark, input_expected_output_all_pairs): +def test_dask_mg_all_pairs_overlap( + dask_client, benchmark, input_expected_output_all_pairs +): dg = input_expected_output_all_pairs["MGGraph"] - use_weight = input_expected_output_all_pairs["is_weighted"] - result_overlap = benchmark( - dcg.all_pairs_overlap, dg, vertices=input_expected_output_all_pairs["vertices"], topk=input_expected_output_all_pairs["topk"], use_weight=use_weight + dcg.all_pairs_overlap, + dg, + vertices=input_expected_output_all_pairs["vertices"], + topk=input_expected_output_all_pairs["topk"], + use_weight=use_weight, ) result_overlap = ( diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py index 0c10fe08b90..e7ac5202454 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py @@ -346,13 +346,20 @@ def test_all_pairs_sorensen(): # Call Sorensen sorensen_results = cugraph.sorensen(G) - + # Remove self loop - sorensen_results = sorensen_results[sorensen_results['first'] != sorensen_results['second']].reset_index(drop=True) - + sorensen_results = sorensen_results[ + sorensen_results["first"] != sorensen_results["second"] + ].reset_index(drop=True) + all_pairs_sorensen_results = cugraph.all_pairs_sorensen(G) - assert_frame_equal(sorensen_results.head(), all_pairs_sorensen_results.head(), check_dtype=False, check_like=True) + assert_frame_equal( + sorensen_results.head(), + all_pairs_sorensen_results.head(), + check_dtype=False, + check_like=True, + ) # FIXME @@ -364,23 +371,30 @@ def test_all_pairs_sorensen_with_vertices(): # Call Sorensen sorensen_results = cugraph.sorensen(G) - + # Remove self loop - sorensen_results = sorensen_results[sorensen_results['first'] != sorensen_results['second']].reset_index(drop=True) + sorensen_results = sorensen_results[ + sorensen_results["first"] != sorensen_results["second"] + ].reset_index(drop=True) vertices = [0, 1, 2] - mask_first = sorensen_results['first'].isin(vertices) - mask_second = sorensen_results['second'].isin(vertices) - # mask = [v in vertices for v in (sorensen_results['first'].to_pandas() or sorensen_results['second'].to_pandas())] + mask_first = sorensen_results["first"].isin(vertices) + mask_second = sorensen_results["second"].isin(vertices) + # mask = [v in vertices for v in (sorensen_results['first'].to_pandas() + # or sorensen_results['second'].to_pandas())] mask = [f or s for (f, s) in zip(mask_first.to_pandas(), mask_second.to_pandas())] sorensen_results = sorensen_results[mask].reset_index(drop=True) # Call all-pairs Sorensen - all_pairs_sorensen_results = cugraph.all_pairs_sorensen(G, vertices=cudf.Series(vertices, dtype="int32")) + all_pairs_sorensen_results = cugraph.all_pairs_sorensen( + G, vertices=cudf.Series(vertices, dtype="int32") + ) - assert_frame_equal(sorensen_results, all_pairs_sorensen_results, check_dtype=False, check_like=True) + assert_frame_equal( + sorensen_results, all_pairs_sorensen_results, check_dtype=False, check_like=True + ) @pytest.mark.sg @@ -392,12 +406,21 @@ def test_all_pairs_sorensen_with_topk(): sorensen_results = cugraph.sorensen(G) topk = 4 - + # Remove self loop - sorensen_results = sorensen_results[sorensen_results['first'] != sorensen_results['second']].\ - sort_values(["sorensen_coeff", "first", "second"], ascending=False).reset_index(drop=True)[:topk] + sorensen_results = ( + sorensen_results[sorensen_results["first"] != sorensen_results["second"]] + .sort_values(["sorensen_coeff", "first", "second"], ascending=False) + .reset_index(drop=True)[:topk] + ) # Call all-pairs sorensen - all_pairs_sorensen_results = cugraph.all_pairs_sorensen(G, topk=topk).sort_values(["first", "second"], ascending=False).reset_index(drop=True) + all_pairs_sorensen_results = ( + cugraph.all_pairs_sorensen(G, topk=topk) + .sort_values(["first", "second"], ascending=False) + .reset_index(drop=True) + ) - assert_frame_equal(sorensen_results, all_pairs_sorensen_results, check_dtype=False, check_like=True) + assert_frame_equal( + sorensen_results, all_pairs_sorensen_results, check_dtype=False, check_like=True + ) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py index ac39ed1cbc6..c4b4eae65d9 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py @@ -63,7 +63,17 @@ def input_combo(request): tests or other parameterized fixtures. """ parameters = dict( - zip(("graph_file", "directed", "has_vertex_pair", "has_vertices", "has_topk", "is_weighted"), request.param) + zip( + ( + "graph_file", + "directed", + "has_vertex_pair", + "has_vertices", + "has_topk", + "is_weighted", + ), + request.param, + ) ) return parameters @@ -149,7 +159,7 @@ def input_expected_output_all_pairs(input_combo): else: vertices = None - + if has_topk: topk = 5 else: @@ -159,7 +169,10 @@ def input_expected_output_all_pairs(input_combo): print("vertices ", vertices, " is_weighted = ", is_weighted) input_combo["topk"] = topk sg_cugraph_all_pairs_sorensen = cugraph.all_pairs_sorensen( - G, vertices=input_combo["vertices"], topk=input_combo["topk"], use_weight=is_weighted + G, + vertices=input_combo["vertices"], + topk=input_combo["topk"], + use_weight=is_weighted, ) # Save the results back to the input_combo dictionary to prevent redundant # cuGraph runs. Other tests using the input_combo fixture will look for diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd index e969afee76f..71d094a6058 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd @@ -53,7 +53,7 @@ cdef extern from "cugraph_c/similarity_algorithms.h": cugraph_similarity_result_free( cugraph_similarity_result_t* result ) - + ########################################################################### # jaccard coefficients cdef cugraph_error_code_t \ @@ -66,7 +66,7 @@ cdef extern from "cugraph_c/similarity_algorithms.h": cugraph_similarity_result_t** result, cugraph_error_t** error ) - + ########################################################################### # all-pairs jaccard coefficients cdef cugraph_error_code_t \ @@ -93,7 +93,7 @@ cdef extern from "cugraph_c/similarity_algorithms.h": cugraph_similarity_result_t** result, cugraph_error_t** error ) - + ########################################################################### # all-pairs sorensen coefficients cdef cugraph_error_code_t \ @@ -120,7 +120,7 @@ cdef extern from "cugraph_c/similarity_algorithms.h": cugraph_similarity_result_t** result, cugraph_error_t** error ) - + ########################################################################### # all-pairs overlap coefficients cdef cugraph_error_code_t \ @@ -134,7 +134,7 @@ cdef extern from "cugraph_c/similarity_algorithms.h": cugraph_similarity_result_t** result, cugraph_error_t** error ) - + ########################################################################### # cosine coefficients cdef cugraph_error_code_t \ @@ -147,7 +147,7 @@ cdef extern from "cugraph_c/similarity_algorithms.h": cugraph_similarity_result_t** result, cugraph_error_t** error ) - + ########################################################################### # all-pairs cosine coefficients cdef cugraph_error_code_t \ diff --git a/python/pylibcugraph/pylibcugraph/all_pairs_cosine_coefficients.pyx b/python/pylibcugraph/pylibcugraph/all_pairs_cosine_coefficients.pyx index 0bf92b01614..b600dd48567 100644 --- a/python/pylibcugraph/pylibcugraph/all_pairs_cosine_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/all_pairs_cosine_coefficients.pyx @@ -87,7 +87,7 @@ def all_pairs_cosine_coefficients(ResourceHandle resource_handle, If set to True, then compute weighted cosine_coefficients( the input graph must be weighted in that case). Otherwise, compute non-weighted cosine_coefficients - + topk : size_t Specify the number of answers to return otherwise will return all values. @@ -142,7 +142,7 @@ def all_pairs_cosine_coefficients(ResourceHandle resource_handle, cdef cugraph_vertex_pairs_t* vertex_pairs_ptr = \ cugraph_similarity_result_get_vertex_pairs(result_ptr) - + cdef cugraph_type_erased_device_array_view_t* first_view_ptr = \ cugraph_vertex_pairs_get_first(vertex_pairs_ptr) diff --git a/python/pylibcugraph/pylibcugraph/all_pairs_jaccard_coefficients.pyx b/python/pylibcugraph/pylibcugraph/all_pairs_jaccard_coefficients.pyx index 70e9846bb75..b65905b6850 100644 --- a/python/pylibcugraph/pylibcugraph/all_pairs_jaccard_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/all_pairs_jaccard_coefficients.pyx @@ -87,7 +87,7 @@ def all_pairs_jaccard_coefficients(ResourceHandle resource_handle, If set to True, then compute weighted jaccard_coefficients( the input graph must be weighted in that case). Otherwise, compute non-weighted jaccard_coefficients - + topk : size_t Specify the number of answers to return otherwise will return all values. @@ -142,7 +142,7 @@ def all_pairs_jaccard_coefficients(ResourceHandle resource_handle, cdef cugraph_vertex_pairs_t* vertex_pairs_ptr = \ cugraph_similarity_result_get_vertex_pairs(result_ptr) - + cdef cugraph_type_erased_device_array_view_t* first_view_ptr = \ cugraph_vertex_pairs_get_first(vertex_pairs_ptr) diff --git a/python/pylibcugraph/pylibcugraph/all_pairs_overlap_coefficients.pyx b/python/pylibcugraph/pylibcugraph/all_pairs_overlap_coefficients.pyx index 95fc99a7dd2..74f3bc06a94 100644 --- a/python/pylibcugraph/pylibcugraph/all_pairs_overlap_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/all_pairs_overlap_coefficients.pyx @@ -87,7 +87,7 @@ def all_pairs_overlap_coefficients(ResourceHandle resource_handle, If set to True, then compute weighted overlap_coefficients( the input graph must be weighted in that case). Otherwise, compute non-weighted overlap_coefficients - + topk : size_t Specify the number of answers to return otherwise will return all values. @@ -142,7 +142,7 @@ def all_pairs_overlap_coefficients(ResourceHandle resource_handle, cdef cugraph_vertex_pairs_t* vertex_pairs_ptr = \ cugraph_similarity_result_get_vertex_pairs(result_ptr) - + cdef cugraph_type_erased_device_array_view_t* first_view_ptr = \ cugraph_vertex_pairs_get_first(vertex_pairs_ptr) diff --git a/python/pylibcugraph/pylibcugraph/all_pairs_sorensen_coefficients.pyx b/python/pylibcugraph/pylibcugraph/all_pairs_sorensen_coefficients.pyx index c5762271776..5e3fc24a4b4 100644 --- a/python/pylibcugraph/pylibcugraph/all_pairs_sorensen_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/all_pairs_sorensen_coefficients.pyx @@ -87,7 +87,7 @@ def all_pairs_sorensen_coefficients(ResourceHandle resource_handle, If set to True, then compute weighted sorensen_coefficients( the input graph must be weighted in that case). Otherwise, compute non-weighted sorensen_coefficients - + topk : size_t Specify the number of answers to return otherwise will return all values. @@ -142,7 +142,7 @@ def all_pairs_sorensen_coefficients(ResourceHandle resource_handle, cdef cugraph_vertex_pairs_t* vertex_pairs_ptr = \ cugraph_similarity_result_get_vertex_pairs(result_ptr) - + cdef cugraph_type_erased_device_array_view_t* first_view_ptr = \ cugraph_vertex_pairs_get_first(vertex_pairs_ptr)