Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into branch-24.02-nx_cugraph_readme_update
Browse files Browse the repository at this point in the history
  • Loading branch information
eriknw committed Feb 2, 2024
2 parents 5805292 + 3d52f17 commit e32918f
Show file tree
Hide file tree
Showing 19 changed files with 678 additions and 46 deletions.
366 changes: 357 additions & 9 deletions benchmarks/nx-cugraph/pytest-based/bench_algos.py

Large diffs are not rendered by default.

46 changes: 46 additions & 0 deletions benchmarks/nx-cugraph/pytest-based/run-2402.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
#
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Runs benchmarks for the 24.02 algos.
# Pass either a or b or both. This is useful for separating batches of runs on different GPUs:
# CUDA_VISIBLE_DEVICES=1 run-2402.sh b

mkdir -p logs

# benches="$benches ..." pattern is easy to comment out individual runs
benches=

while [[ $1 != "" ]]; do
if [[ $1 == "a" ]]; then
benches="$benches bench_ancestors"
benches="$benches bench_average_clustering"
benches="$benches bench_generic_bfs_edges"
benches="$benches bench_bfs_edges"
benches="$benches bench_bfs_layers"
benches="$benches bench_bfs_predecessors"
benches="$benches bench_bfs_successors"
benches="$benches bench_bfs_tree"
benches="$benches bench_clustering"
benches="$benches bench_core_number"
benches="$benches bench_descendants"
elif [[ $1 == "b" ]]; then
benches="$benches bench_descendants_at_distance"
benches="$benches bench_is_bipartite"
benches="$benches bench_is_strongly_connected"
benches="$benches bench_is_weakly_connected"
benches="$benches bench_number_strongly_connected_components"
benches="$benches bench_number_weakly_connected_components"
benches="$benches bench_overall_reciprocity"
benches="$benches bench_reciprocity"
benches="$benches bench_strongly_connected_components"
benches="$benches bench_transitivity"
benches="$benches bench_triangles"
benches="$benches bench_weakly_connected_components"
fi
shift
done

for bench in $benches; do
pytest -sv -k "soc-livejournal1" "bench_algos.py::$bench" 2>&1 | tee "logs/${bench}.log"
done
35 changes: 33 additions & 2 deletions ci/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,16 @@ pytest \
tests
popd

# FIXME: TEMPORARILY disable single-GPU "MG" testing
# Test runs that include tests that use dask require
# --import-mode=append. Those tests start a LocalCUDACluster that inherits
# changes from pytest's modifications to PYTHONPATH (which defaults to
# prepending source tree paths to PYTHONPATH). This causes the
# LocalCUDACluster subprocess to import cugraph from the source tree instead of
# the install location, and in most cases, the source tree does not have
# extensions built in-place and will result in ImportErrors.
#
# FIXME: TEMPORARILY disable MG PropertyGraph tests (experimental) tests and
# bulk sampler IO tests (hangs in CI)
rapids-logger "pytest cugraph"
pushd python/cugraph/cugraph
DASK_WORKER_DEVICES="0" \
Expand All @@ -72,14 +81,15 @@ DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \
DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \
pytest \
-v \
--import-mode=append \
--benchmark-disable \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph.xml" \
--cov-config=../../.coveragerc \
--cov=cugraph \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \
--cov-report=term \
-k "not test_property_graph_mg" \
-k "not test_property_graph_mg and not test_bulk_sampler_io" \
tests
popd

Expand Down Expand Up @@ -110,12 +120,33 @@ popd

rapids-logger "pytest networkx using nx-cugraph backend"
pushd python/nx-cugraph
# Use editable install to make coverage work
pip install -e . --no-deps
./run_nx_tests.sh
# run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0%
# in case nx-cugraph failed to load but fallback mode allowed the run to pass.
_coverage=$(coverage report|grep "^TOTAL")
echo "nx-cugraph coverage from networkx tests: $_coverage"
echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }'
# Ensure all algorithms were called by comparing covered lines to function lines.
# Run our tests again (they're fast enough) to add their coverage, then create coverage.json
pytest \
--pyargs nx_cugraph \
--config-file=./pyproject.toml \
--cov-config=./pyproject.toml \
--cov=nx_cugraph \
--cov-append \
--cov-report=
coverage report \
--include="*/nx_cugraph/algorithms/*" \
--omit=__init__.py \
--show-missing \
--rcfile=./pyproject.toml
coverage json --rcfile=./pyproject.toml
python -m nx_cugraph.tests.ensure_algos_covered
# Exercise (and show results of) scripts that show implemented networkx algorithms
python -m nx_cugraph.scripts.print_tree --dispatch-name --plc --incomplete --different
python -m nx_cugraph.scripts.print_table
popd

rapids-logger "pytest cugraph-service (single GPU)"
Expand Down
17 changes: 14 additions & 3 deletions ci/test_wheel.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -eoxu pipefail

Expand All @@ -21,10 +21,21 @@ arch=$(uname -m)
if [[ "${arch}" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then
python ./ci/wheel_smoke_test_${package_name}.py
else
# FIXME: TEMPORARILY disable single-GPU "MG" testing
# Test runs that include tests that use dask require
# --import-mode=append. See test_python.sh for details.
# FIXME: Adding PY_IGNORE_IMPORTMISMATCH=1 to workaround conftest.py import
# mismatch error seen by nx-cugraph after using pytest 8 and
# --import-mode=append.
RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets \
PY_IGNORE_IMPORTMISMATCH=1 \
DASK_WORKER_DEVICES="0" \
DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL="1000s" \
DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \
DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \
python -m pytest ./python/${package_name}/${python_package_name}/tests
python -m pytest \
-v \
--import-mode=append \
--benchmark-disable \
-k "not test_property_graph_mg and not test_bulk_sampler_io" \
./python/${package_name}/${python_package_name}/tests
fi
3 changes: 2 additions & 1 deletion cpp/cmake/thirdparty/get_raft.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,6 +52,7 @@ function(find_and_configure_raft)
"RAFT_COMPILE_LIBRARY ${PKG_COMPILE_RAFT_LIB}"
"BUILD_TESTS OFF"
"BUILD_BENCH OFF"
"BUILD_CAGRA_HNSWLIB OFF"
)

if(raft_ADDED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
)
from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster
import cugraph.dask.comms.comms as Comms
from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates


class simpleDistributedGraphImpl:
Expand Down Expand Up @@ -95,6 +96,7 @@ def _make_plc_graph(
weight_type,
edge_id_type,
edge_type_id,
drop_multi_edges,
):
weights = None
edge_ids = None
Expand Down Expand Up @@ -149,6 +151,7 @@ def _make_plc_graph(
num_arrays=num_arrays,
store_transposed=store_transposed,
do_expensive_check=False,
drop_multi_edges=drop_multi_edges,
)
del edata_x
gc.collect()
Expand Down Expand Up @@ -267,7 +270,7 @@ def __from_edgelist(
input_ddf,
source,
destination,
multi=self.properties.multi_edge,
multi=True, # Deprecated parameter
symmetrize=not self.properties.directed,
)
value_col = None
Expand All @@ -277,7 +280,7 @@ def __from_edgelist(
source,
destination,
value_col_names,
multi=self.properties.multi_edge,
multi=True, # Deprecated parameter
symmetrize=not self.properties.directed,
)

Expand Down Expand Up @@ -364,6 +367,7 @@ def __from_edgelist(
self.weight_type,
self.edge_id_type,
self.edge_type_id_type,
not self.properties.multi_edge,
)
for w, edata in persisted_keys_d.items()
}
Expand Down Expand Up @@ -455,6 +459,15 @@ def view_edge_list(self):
else:
is_multi_column = True

if not self.properties.multi_edge:
# Drop parallel edges for non MultiGraph
# FIXME: Drop multi edges with the CAPI instead.
_client = default_client()
workers = _client.scheduler_info()["workers"]
edgelist_df = _memory_efficient_drop_duplicates(
edgelist_df, [srcCol, dstCol], len(workers)
)

edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[
[srcCol, dstCol]
].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -264,7 +264,7 @@ def __from_edgelist(
source,
destination,
edge_attr,
multi=self.properties.multi_edge,
multi=self.properties.multi_edge, # Deprecated parameter
symmetrize=not self.properties.directed,
)

Expand All @@ -279,7 +279,7 @@ def __from_edgelist(
elist,
source,
destination,
multi=self.properties.multi_edge,
multi=self.properties.multi_edge, # Deprecated parameter
symmetrize=not self.properties.directed,
)

Expand All @@ -298,7 +298,10 @@ def __from_edgelist(
self._replicate_edgelist()

self._make_plc_graph(
value_col=value_col, store_transposed=store_transposed, renumber=renumber
value_col=value_col,
store_transposed=store_transposed,
renumber=renumber,
drop_multi_edges=not self.properties.multi_edge,
)

def to_pandas_edgelist(
Expand Down Expand Up @@ -477,13 +480,15 @@ def view_edge_list(self):
edgelist_df[simpleGraphImpl.srcCol]
<= edgelist_df[simpleGraphImpl.dstCol]
]

elif not use_initial_input_df and self.properties.renumbered:
# Do not unrenumber the vertices if the initial input df was used
if not self.properties.directed:
edgelist_df = edgelist_df[
edgelist_df[simpleGraphImpl.srcCol]
<= edgelist_df[simpleGraphImpl.dstCol]
]

edgelist_df = self.renumber_map.unrenumber(
edgelist_df, simpleGraphImpl.srcCol
)
Expand Down Expand Up @@ -1084,6 +1089,7 @@ def _make_plc_graph(
value_col: Dict[str, cudf.DataFrame] = None,
store_transposed: bool = False,
renumber: bool = True,
drop_multi_edges: bool = False,
):
"""
Parameters
Expand All @@ -1100,6 +1106,8 @@ def _make_plc_graph(
Whether to renumber the vertices of the graph.
Required if inputted vertex ids are not of
int32 or int64 type.
drop_multi_edges: bool (default=False)
Whether to drop multi edges
"""

if value_col is None:
Expand Down Expand Up @@ -1163,6 +1171,7 @@ def _make_plc_graph(
renumber=renumber,
do_expensive_check=True,
input_array_format=input_array_format,
drop_multi_edges=drop_multi_edges,
)

def to_directed(self, DiG, store_transposed=False):
Expand Down
Loading

0 comments on commit e32918f

Please sign in to comment.