Skip to content

Commit

Permalink
Merge pull request #4137 from rapidsai/branch-24.02
Browse files Browse the repository at this point in the history
Forward-merge branch-24.02 to branch-24.04
  • Loading branch information
GPUtester authored Feb 2, 2024
2 parents 26efc6d + 3d52f17 commit b4f7a48
Show file tree
Hide file tree
Showing 16 changed files with 636 additions and 34 deletions.
366 changes: 357 additions & 9 deletions benchmarks/nx-cugraph/pytest-based/bench_algos.py

Large diffs are not rendered by default.

46 changes: 46 additions & 0 deletions benchmarks/nx-cugraph/pytest-based/run-2402.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
#
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Runs benchmarks for the 24.02 algos.
# Pass either a or b or both. This is useful for separating batches of runs on different GPUs:
# CUDA_VISIBLE_DEVICES=1 run-2402.sh b

mkdir -p logs

# benches="$benches ..." pattern is easy to comment out individual runs
benches=

while [[ $1 != "" ]]; do
if [[ $1 == "a" ]]; then
benches="$benches bench_ancestors"
benches="$benches bench_average_clustering"
benches="$benches bench_generic_bfs_edges"
benches="$benches bench_bfs_edges"
benches="$benches bench_bfs_layers"
benches="$benches bench_bfs_predecessors"
benches="$benches bench_bfs_successors"
benches="$benches bench_bfs_tree"
benches="$benches bench_clustering"
benches="$benches bench_core_number"
benches="$benches bench_descendants"
elif [[ $1 == "b" ]]; then
benches="$benches bench_descendants_at_distance"
benches="$benches bench_is_bipartite"
benches="$benches bench_is_strongly_connected"
benches="$benches bench_is_weakly_connected"
benches="$benches bench_number_strongly_connected_components"
benches="$benches bench_number_weakly_connected_components"
benches="$benches bench_overall_reciprocity"
benches="$benches bench_reciprocity"
benches="$benches bench_strongly_connected_components"
benches="$benches bench_transitivity"
benches="$benches bench_triangles"
benches="$benches bench_weakly_connected_components"
fi
shift
done

for bench in $benches; do
pytest -sv -k "soc-livejournal1" "bench_algos.py::$bench" 2>&1 | tee "logs/${bench}.log"
done
21 changes: 21 additions & 0 deletions ci/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,33 @@ popd

rapids-logger "pytest networkx using nx-cugraph backend"
pushd python/nx-cugraph
# Use editable install to make coverage work
pip install -e . --no-deps
./run_nx_tests.sh
# run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0%
# in case nx-cugraph failed to load but fallback mode allowed the run to pass.
_coverage=$(coverage report|grep "^TOTAL")
echo "nx-cugraph coverage from networkx tests: $_coverage"
echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }'
# Ensure all algorithms were called by comparing covered lines to function lines.
# Run our tests again (they're fast enough) to add their coverage, then create coverage.json
pytest \
--pyargs nx_cugraph \
--config-file=./pyproject.toml \
--cov-config=./pyproject.toml \
--cov=nx_cugraph \
--cov-append \
--cov-report=
coverage report \
--include="*/nx_cugraph/algorithms/*" \
--omit=__init__.py \
--show-missing \
--rcfile=./pyproject.toml
coverage json --rcfile=./pyproject.toml
python -m nx_cugraph.tests.ensure_algos_covered
# Exercise (and show results of) scripts that show implemented networkx algorithms
python -m nx_cugraph.scripts.print_tree --dispatch-name --plc --incomplete --different
python -m nx_cugraph.scripts.print_table
popd

rapids-logger "pytest cugraph-service (single GPU)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
)
from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster
import cugraph.dask.comms.comms as Comms
from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates


class simpleDistributedGraphImpl:
Expand Down Expand Up @@ -95,6 +96,7 @@ def _make_plc_graph(
weight_type,
edge_id_type,
edge_type_id,
drop_multi_edges,
):
weights = None
edge_ids = None
Expand Down Expand Up @@ -149,6 +151,7 @@ def _make_plc_graph(
num_arrays=num_arrays,
store_transposed=store_transposed,
do_expensive_check=False,
drop_multi_edges=drop_multi_edges,
)
del edata_x
gc.collect()
Expand Down Expand Up @@ -267,7 +270,7 @@ def __from_edgelist(
input_ddf,
source,
destination,
multi=self.properties.multi_edge,
multi=True, # Deprecated parameter
symmetrize=not self.properties.directed,
)
value_col = None
Expand All @@ -277,7 +280,7 @@ def __from_edgelist(
source,
destination,
value_col_names,
multi=self.properties.multi_edge,
multi=True, # Deprecated parameter
symmetrize=not self.properties.directed,
)

Expand Down Expand Up @@ -364,6 +367,7 @@ def __from_edgelist(
self.weight_type,
self.edge_id_type,
self.edge_type_id_type,
not self.properties.multi_edge,
)
for w, edata in persisted_keys_d.items()
}
Expand Down Expand Up @@ -455,6 +459,15 @@ def view_edge_list(self):
else:
is_multi_column = True

if not self.properties.multi_edge:
# Drop parallel edges for non MultiGraph
# FIXME: Drop multi edges with the CAPI instead.
_client = default_client()
workers = _client.scheduler_info()["workers"]
edgelist_df = _memory_efficient_drop_duplicates(
edgelist_df, [srcCol, dstCol], len(workers)
)

edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[
[srcCol, dstCol]
].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -264,7 +264,7 @@ def __from_edgelist(
source,
destination,
edge_attr,
multi=self.properties.multi_edge,
multi=self.properties.multi_edge, # Deprecated parameter
symmetrize=not self.properties.directed,
)

Expand All @@ -279,7 +279,7 @@ def __from_edgelist(
elist,
source,
destination,
multi=self.properties.multi_edge,
multi=self.properties.multi_edge, # Deprecated parameter
symmetrize=not self.properties.directed,
)

Expand All @@ -298,7 +298,10 @@ def __from_edgelist(
self._replicate_edgelist()

self._make_plc_graph(
value_col=value_col, store_transposed=store_transposed, renumber=renumber
value_col=value_col,
store_transposed=store_transposed,
renumber=renumber,
drop_multi_edges=not self.properties.multi_edge,
)

def to_pandas_edgelist(
Expand Down Expand Up @@ -477,13 +480,15 @@ def view_edge_list(self):
edgelist_df[simpleGraphImpl.srcCol]
<= edgelist_df[simpleGraphImpl.dstCol]
]

elif not use_initial_input_df and self.properties.renumbered:
# Do not unrenumber the vertices if the initial input df was used
if not self.properties.directed:
edgelist_df = edgelist_df[
edgelist_df[simpleGraphImpl.srcCol]
<= edgelist_df[simpleGraphImpl.dstCol]
]

edgelist_df = self.renumber_map.unrenumber(
edgelist_df, simpleGraphImpl.srcCol
)
Expand Down Expand Up @@ -1084,6 +1089,7 @@ def _make_plc_graph(
value_col: Dict[str, cudf.DataFrame] = None,
store_transposed: bool = False,
renumber: bool = True,
drop_multi_edges: bool = False,
):
"""
Parameters
Expand All @@ -1100,6 +1106,8 @@ def _make_plc_graph(
Whether to renumber the vertices of the graph.
Required if inputted vertex ids are not of
int32 or int64 type.
drop_multi_edges: bool (default=False)
Whether to drop multi edges
"""

if value_col is None:
Expand Down Expand Up @@ -1163,6 +1171,7 @@ def _make_plc_graph(
renumber=renumber,
do_expensive_check=True,
input_array_format=input_array_format,
drop_multi_edges=drop_multi_edges,
)

def to_directed(self, DiG, store_transposed=False):
Expand Down
37 changes: 34 additions & 3 deletions python/cugraph/cugraph/structure/symmetrize.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -15,6 +15,7 @@
import cudf
import dask_cudf
from dask.distributed import default_client
import warnings


def symmetrize_df(
Expand Down Expand Up @@ -54,6 +55,11 @@ def symmetrize_df(
Name of the column in the data frame containing the weight ids
multi : bool, optional (default=False)
[Deprecated, Multi will be removed in future version, and the removal
of multi edges will no longer be supported from 'symmetrize'.
Multi edges will be removed upon creation of graph instance directly
based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.]
Set to True if graph is a Multi(Di)Graph. This allows multiple
edges instead of dropping them.
Expand Down Expand Up @@ -84,6 +90,12 @@ def symmetrize_df(
if multi:
return result
else:
warnings.warn(
"Multi is deprecated and the removal of multi edges will no longer be "
"supported from 'symmetrize'. Multi edges will be removed upon creation "
"of graph instance.",
FutureWarning,
)
vertex_col_name = src_name + dst_name
result = result.groupby(by=[*vertex_col_name], as_index=False).min()
return result
Expand Down Expand Up @@ -128,6 +140,11 @@ def symmetrize_ddf(
Name of the column in the data frame containing the weight ids
multi : bool, optional (default=False)
[Deprecated, Multi will be removed in future version, and the removal
of multi edges will no longer be supported from 'symmetrize'.
Multi edges will be removed upon creation of graph instance directly
based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.]
Set to True if graph is a Multi(Di)Graph. This allows multiple
edges instead of dropping them.
Expand Down Expand Up @@ -165,8 +182,15 @@ def symmetrize_ddf(
else:
result = ddf
if multi:
result = result.reset_index(drop=True).repartition(npartitions=len(workers) * 2)
return result
else:
warnings.warn(
"Multi is deprecated and the removal of multi edges will no longer be "
"supported from 'symmetrize'. Multi edges will be removed upon creation "
"of graph instance.",
FutureWarning,
)
vertex_col_name = src_name + dst_name
result = _memory_efficient_drop_duplicates(
result, vertex_col_name, len(workers)
Expand All @@ -181,6 +205,7 @@ def symmetrize(
value_col_name=None,
multi=False,
symmetrize=True,
do_expensive_check=False,
):
"""
Take a dataframe of source destination pairs along with associated
Expand Down Expand Up @@ -208,6 +233,11 @@ def symmetrize(
weights column name.
multi : bool, optional (default=False)
[Deprecated, Multi will be removed in future version, and the removal
of multi edges will no longer be supported from 'symmetrize'.
Multi edges will be removed upon creation of graph instance directly
based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.]
Set to True if graph is a Multi(Di)Graph. This allows multiple
edges instead of dropping them.
Expand All @@ -234,8 +264,9 @@ def symmetrize(
if "edge_id" in input_df.columns and symmetrize:
raise ValueError("Edge IDs are not supported on undirected graphs")

csg.null_check(input_df[source_col_name])
csg.null_check(input_df[dest_col_name])
if do_expensive_check: # FIXME: Optimize this check as it is currently expensive
csg.null_check(input_df[source_col_name])
csg.null_check(input_df[dest_col_name])

if isinstance(input_df, dask_cudf.DataFrame):
output_df = symmetrize_ddf(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -26,6 +26,7 @@
from cugraph.testing import UNDIRECTED_DATASETS
from cugraph.dask import uniform_neighbor_sample
from cugraph.dask.common.mg_utils import is_single_gpu
from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates
from cugraph.datasets import email_Eu_core, small_tree
from pylibcugraph.testing.utils import gen_fixture_params_product

Expand Down Expand Up @@ -135,6 +136,14 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo):
dg = input_combo["MGGraph"]

input_df = dg.input_df
# Drop parallel edges for non MultiGraph
# FIXME: Drop multi edges with the CAPI instead.
vertex_col_name = ["src", "dst"]
workers = dask_client.scheduler_info()["workers"]
input_df = _memory_efficient_drop_duplicates(
input_df, vertex_col_name, len(workers)
)

result_nbr = uniform_neighbor_sample(
dg,
input_combo["start_list"],
Expand Down
2 changes: 1 addition & 1 deletion python/nx-cugraph/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ repos:
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.15
rev: v0.16
hooks:
- id: validate-pyproject
name: Validate pyproject.toml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbor
raise NotImplementedError(
"sort_neighbors argument in generic_bfs_edges is not currently supported"
)
return bfs_edges(source, depth_limit=depth_limit)
return bfs_edges(G, source, depth_limit=depth_limit)


@generic_bfs_edges._can_run
Expand Down
12 changes: 6 additions & 6 deletions python/nx-cugraph/nx_cugraph/classes/digraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def to_undirected(self, reciprocal=False, as_view=False):
key: val[indices].copy() for key, val in self.edge_masks.items()
}
else:
src_indices, dst_indices = cp.divmod(
src_dst_indices_new, N, dtype=index_dtype
)
src_indices, dst_indices = cp.divmod(src_dst_indices_new, N)
src_indices = src_indices.astype(index_dtype)
dst_indices = dst_indices.astype(index_dtype)
else:
src_dst_indices_old_T = self.src_indices + N * self.dst_indices.astype(
np.int64
Expand Down Expand Up @@ -116,9 +116,9 @@ def to_undirected(self, reciprocal=False, as_view=False):
src_dst_indices_new = cp.union1d(
src_dst_indices_old, src_dst_indices_old_T
)
src_indices, dst_indices = cp.divmod(
src_dst_indices_new, N, dtype=index_dtype
)
src_indices, dst_indices = cp.divmod(src_dst_indices_new, N)
src_indices = src_indices.astype(index_dtype)
dst_indices = dst_indices.astype(index_dtype)

if self.edge_values:
recip_indices = cp.lexsort(cp.vstack((src_indices, dst_indices)))
Expand Down
Loading

0 comments on commit b4f7a48

Please sign in to comment.