Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into cleanup_data_types
Browse files Browse the repository at this point in the history
  • Loading branch information
ChuckHastings committed Oct 23, 2024
2 parents d0170ae + f917ae4 commit 5b466fa
Show file tree
Hide file tree
Showing 22 changed files with 433 additions and 62 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,12 @@ jobs:
- '!notebooks/**'
- '!python/**'
- '!readme_pages/**'
# TODO: Remove this before merging
- '!.github/**'
test_notebooks:
- '**'
- '!.devcontainers/**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
# TODO: Remove this before merging
- '!.github/**'
test_python:
- '**'
- '!.devcontainers/**'
Expand All @@ -73,8 +69,6 @@ jobs:
- '!docs/**'
- '!img/**'
- '!notebooks/**'
# TODO: Remove this before merging
- '!.github/**'
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
13 changes: 3 additions & 10 deletions benchmarks/nx-cugraph/pytest-based/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Our current benchmarks provide the following datasets:
| -------- | ------- | ------- | ------- |
| netscience | 1,461 | 5,484 | Yes |
| email-Eu-core | 1,005 | 25,571 | Yes |
| amazon0302 | 262,111 | 1,234,877 | Yes |
| cit-Patents | 3,774,768 | 16,518,948 | Yes |
| hollywood | 1,139,905 | 57,515,616 | No |
| soc-LiveJournal1 | 4,847,571 | 68,993,773 | Yes |
Expand Down Expand Up @@ -39,16 +40,8 @@ NOTE:
./run-main-benchmarks.sh
```

#### 2. `get_graph_bench_dataset.py`
This script downloads the specified dataset using `cugraph.datasets`.

**Usage:**
```bash
python get_graph_bench_dataset.py [dataset]
```

#### 3. `create_results_summary_page.py`
This script is designed to be run after `run-gap-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run.
#### 2. `create_results_summary_page.py`
This script is designed to be run after `run-main-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run.

**Usage:**
```bash
Expand Down
8 changes: 8 additions & 0 deletions cpp/include/cugraph_c/lookup_src_dst.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ cugraph_type_erased_device_array_view_t* cugraph_lookup_result_get_dsts(
*/
void cugraph_lookup_result_free(cugraph_lookup_result_t* result);

/**
* @ingroup samplingC
* @brief Free a sampling lookup map
*
* @param [in] container The sampling lookup map (a.k.a. container).
*/
void cugraph_lookup_container_free(cugraph_lookup_container_t* container);

#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion cpp/src/c_api/graph_sg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ cugraph_error_code_t cugraph_graph_create_sg_from_csr(
p_edge_ids,
p_edge_type_ids,
renumber,
FALSE, // symmetrize
symmetrize,
do_expensive_check);

try {
Expand Down
45 changes: 29 additions & 16 deletions cpp/src/c_api/lookup_src_dst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,23 +307,26 @@ extern "C" cugraph_error_code_t cugraph_lookup_endpoints_from_edge_ids_and_types
{
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_graph_t const*>(lookup_container)->vertex_type_,
reinterpret_cast<cugraph::c_api::cugraph_lookup_container_t const*>(lookup_container)
->vertex_type_,
CUGRAPH_INVALID_INPUT,
"vertex type of graph and lookup_container must match",
*error);
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_graph_t const*>(lookup_container)->edge_type_,
reinterpret_cast<cugraph::c_api::cugraph_lookup_container_t const*>(lookup_container)
->edge_type_,
CUGRAPH_INVALID_INPUT,
"edge type of graph and lookup_container must match",
*error);

CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_type_id_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_graph_t const*>(lookup_container)
->edge_type_id_type_,
CUGRAPH_INVALID_INPUT,
"edge type id type of graph and lookup_container must match",
*error);
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_type_id_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_lookup_container_t const*>(lookup_container)
->edge_type_id_type_,
CUGRAPH_INVALID_INPUT,
"edge type id type of graph and lookup_container must match",
*error);

lookup_using_edge_ids_and_types_functor functor(
handle, graph, lookup_container, edge_ids_to_lookup, edge_types_to_lookup);
Expand All @@ -341,23 +344,26 @@ extern "C" cugraph_error_code_t cugraph_lookup_endpoints_from_edge_ids_and_singl
{
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_graph_t const*>(lookup_container)->vertex_type_,
reinterpret_cast<cugraph::c_api::cugraph_lookup_container_t const*>(lookup_container)
->vertex_type_,
CUGRAPH_INVALID_INPUT,
"vertex type of graph and lookup_container must match",
*error);
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_graph_t const*>(lookup_container)->edge_type_,
reinterpret_cast<cugraph::c_api::cugraph_lookup_container_t const*>(lookup_container)
->edge_type_,
CUGRAPH_INVALID_INPUT,
"edge type of graph and lookup_container must match",
*error);

CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_type_id_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_graph_t const*>(lookup_container)
->edge_type_id_type_,
CUGRAPH_INVALID_INPUT,
"edge type id type of graph and lookup_container must match",
*error);
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_type_id_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_lookup_container_t const*>(lookup_container)
->edge_type_id_type_,
CUGRAPH_INVALID_INPUT,
"edge type id type of graph and lookup_container must match",
*error);

lookup_using_edge_ids_of_single_type_functor functor(
handle, graph, lookup_container, edge_ids_to_lookup, edge_type_to_lookup);
Expand Down Expand Up @@ -387,3 +393,10 @@ extern "C" void cugraph_lookup_result_free(cugraph_lookup_result_t* result)
delete internal_pointer->dsts_;
delete internal_pointer;
}

extern "C" void cugraph_lookup_container_free(cugraph_lookup_container_t* container)
{
auto internal_ptr = reinterpret_cast<cugraph::c_api::cugraph_lookup_container_t*>(container);
// The graph should presumably own the other structures.
delete internal_ptr;
}
3 changes: 1 addition & 2 deletions cpp/src/community/detail/refine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ refine_clustering(
rmm::device_uvector<typename graph_view_t::vertex_type>&& next_clusters_v,
edge_src_property_t<graph_view_t, weight_t> const& src_vertex_weights_cache,
edge_src_property_t<graph_view_t, typename graph_view_t::vertex_type> const& src_clusters_cache,
edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache,
bool up_down);
edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache);

}
} // namespace cugraph
9 changes: 7 additions & 2 deletions cpp/src/community/detail/refine_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ refine_clustering(
edge_src_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
src_louvain_assignment_cache,
edge_dst_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
dst_louvain_assignment_cache,
bool up_down)
dst_louvain_assignment_cache)
{
const weight_t POSITIVE_GAIN = 1e-6;
using vertex_t = typename GraphViewType::vertex_type;
Expand Down Expand Up @@ -230,6 +229,7 @@ refine_clustering(
cugraph::reduce_op::plus<weight_t>{},
weighted_cut_of_vertices_to_louvain.begin());

// FIXME: Consider using bit mask logic here. Would reduce memory by 8x
rmm::device_uvector<uint8_t> singleton_and_connected_flags(
graph_view.local_vertex_partition_range_size(), handle.get_stream());

Expand Down Expand Up @@ -297,6 +297,11 @@ refine_clustering(
edge_dst_property_t<GraphViewType, vertex_t> dst_leiden_assignment_cache(handle);
edge_src_property_t<GraphViewType, uint8_t> src_singleton_and_connected_flag_cache(handle);

// FIXME: Why is kvstore used here? Can't this be accomplished by
// a direct lookup in louvain_assignment_of_vertices using
// leiden - graph_view.local_vertex_partition_range_first() as the
// index?
// Changing this would save memory and time
kv_store_t<vertex_t, vertex_t, false> leiden_to_louvain_map(
leiden_assignment.begin(),
leiden_assignment.end(),
Expand Down
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_mg_v32_e32.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int32_t>,
std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_mg_v64_e64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int64_t>,
std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_sg_v32_e32.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int32_t>,
std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_sg_v64_e64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int64_t>,
std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
26 changes: 17 additions & 9 deletions cpp/src/community/leiden_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
HighResTimer hr_timer{};
#endif

weight_t best_modularity = weight_t{-1.0};
weight_t final_Q{-1};

weight_t total_edge_weight =
compute_total_edge_weight(handle, current_graph_view, *current_edge_weight_view);

Expand Down Expand Up @@ -368,9 +369,6 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
detail::timer_stop<graph_view_t::is_multi_gpu>(handle, hr_timer);
#endif

bool terminate = (cur_Q <= best_modularity);
if (!terminate) { best_modularity = cur_Q; }

#ifdef TIMING
detail::timer_start<graph_view_t::is_multi_gpu>(handle, hr_timer, "contract graph");
#endif
Expand All @@ -386,8 +384,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
auto nr_unique_louvain_clusters =
remove_duplicates<vertex_t, multi_gpu>(handle, copied_louvain_partition);

terminate =
terminate || (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());
bool terminate = (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());

rmm::device_uvector<vertex_t> refined_leiden_partition(0, handle.get_stream());
std::pair<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> leiden_to_louvain_map{
Expand Down Expand Up @@ -426,11 +423,19 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
std::move(louvain_assignment_for_vertices),
src_vertex_weights_cache,
src_louvain_assignment_cache,
dst_louvain_assignment_cache,
up_down);
dst_louvain_assignment_cache);
}

// Clear buffer and contract the graph
final_Q = detail::compute_modularity(handle,
current_graph_view,
current_edge_weight_view,
src_louvain_assignment_cache,
dst_louvain_assignment_cache,
louvain_assignment_for_vertices,
cluster_weights,
total_edge_weight,
resolution);

cluster_keys.resize(0, handle.get_stream());
cluster_weights.resize(0, handle.get_stream());
Expand All @@ -445,6 +450,9 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
dst_louvain_assignment_cache.clear(handle);

if (!terminate) {
src_louvain_assignment_cache.clear(handle);
dst_louvain_assignment_cache.clear(handle);

auto nr_unique_leiden = static_cast<vertex_t>(leiden_to_louvain_map.first.size());
if (graph_view_t::is_multi_gpu) {
nr_unique_leiden = host_scalar_allreduce(
Expand Down Expand Up @@ -586,7 +594,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
detail::timer_display<graph_view_t::is_multi_gpu>(handle, hr_timer, std::cout);
#endif

return std::make_pair(std::move(dendrogram), best_modularity);
return std::make_pair(std::move(dendrogram), final_Q);
}

template <typename vertex_t, bool multi_gpu>
Expand Down
32 changes: 32 additions & 0 deletions python/cugraph/cugraph/tests/structure/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from cudf.testing import assert_series_equal
from cudf.testing.testing import assert_frame_equal
from cugraph.structure.symmetrize import symmetrize
from cugraph.datasets import karate_asymmetric

# MG
import dask_cudf
Expand Down Expand Up @@ -204,6 +205,37 @@ def test_add_adj_list_to_edge_list(graph_file):
compare_series(destinations_cu, destinations_exp)


@pytest.mark.sg
def test_create_undirected_graph_from_asymmetric_adj_list():
# karate_asymmetric.get_path()
Mnx = utils.read_csv_for_nx(karate_asymmetric.get_path())
N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx["0"], Mnx["1"])), shape=(N, N))

offsets = cudf.Series(Mcsr.indptr)
indices = cudf.Series(Mcsr.indices)

G = cugraph.Graph(directed=False)

with pytest.raises(Exception):
# Ifan undirected graph is created with 'symmetrize' set to False, the
# edgelist provided by the user must be symmetric.
G.from_cudf_adjlist(offsets, indices, None, symmetrize=False)

G = cugraph.Graph(directed=False)
G.from_cudf_adjlist(offsets, indices, None, symmetrize=True)

# FIXME: Since we have no mechanism to access the symmetrized edgelist
# from the graph_view_t, assert that the edgelist size is unchanged. Once
# exposing 'decompress_to_edgelist', ensure that
# G.number_of_edges() == 2 * karate_asymmetric.get_edgelist()?
assert G.number_of_edges() == len(karate_asymmetric.get_edgelist())

# FIXME: Once 'decompress_to_edgelist' is exposed to the
# python API, ensure that the derived edgelist is symmetric
# if symmetrize = True.


# Test
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", utils.DATASETS)
Expand Down
Loading

0 comments on commit 5b466fa

Please sign in to comment.