Skip to content

Commit

Permalink
Merge branch 'plc-eid-lookup' of https://github.com/alexbarghi-nv/cug…
Browse files Browse the repository at this point in the history
…raph into plc-eid-lookup
  • Loading branch information
alexbarghi-nv committed Oct 18, 2024
2 parents 340a488 + f3e69a5 commit 38a1755
Show file tree
Hide file tree
Showing 83 changed files with 279 additions and 582 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ repos:
types: [python]
language: python
pass_filenames: false
additional_dependencies: ["networkx>=3.3"]
additional_dependencies: ["networkx>=3.4"]
- repo: local
hooks:
- id: nx-cugraph-readme-update
Expand All @@ -78,4 +78,4 @@ repos:
types_or: [python, markdown]
language: python
pass_filenames: false
additional_dependencies: ["networkx>=3.3"]
additional_dependencies: ["networkx>=3.4"]
4 changes: 3 additions & 1 deletion benchmarks/nx-cugraph/pytest-based/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ Our current benchmarks provide the following datasets:
#### 1. `run-main-benchmarks.sh`
This script allows users to run a small set of commonly-used algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark.

NOTE: If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running.
NOTE:
- If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running.
- The `betweenness_centrality` benchmark will run with values `[10, 20, 50, 100, 500, 1000]` by default. You can specify only specific k-values to be run by editing `bc_k_values` (line 46) to be passed as a [pytest keyword object](https://docs.pytest.org/en/6.2.x/usage.html#specifying-tests-selecting-tests).

**Usage:**
- Run with `--cpu-only`:
Expand Down
53 changes: 38 additions & 15 deletions benchmarks/nx-cugraph/pytest-based/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,40 @@
iterations = 1
warmup_rounds = 1

# FIXME: Add this to cugraph.datasets. This is done here so these benchmarks
# can be run without requiring an updated cugraph install. This temporarily
# adds a dataset based on an Amazon product co-purchasing network.
amazon0302_metadata = """
name: amazon0302
description:
Network was collected by crawling Amazon website. It is based on Customers Who Bought This Item Also Bought feature of the Amazon website. If a product i is frequently co-purchased with product j, the graph contains a directed edge from i to j. The data was collected in March 02 2003.
author: J. Leskovec, L. Adamic and B. Adamic
refs: J. Leskovec, L. Adamic and B. Adamic. The Dynamics of Viral Marketing. ACM Transactions on the Web (ACM TWEB), 1(1), 2007.
delim: "\t"
header: 3
col_names:
- FromNodeId
- ToNodeId
col_types:
- int32
- int32
has_loop: false
is_directed: true
is_multigraph: false
is_symmetric: false
number_of_edges: 1234877
number_of_nodes: 262111
url: https://snap.stanford.edu/data/amazon0302.txt.gz
"""
amazon0302_metadata_file_name = datasets.default_download_dir.path / "amazon0302.yaml"
if not amazon0302_metadata_file_name.exists():
amazon0302_metadata_file_name.parent.mkdir(parents=True, exist_ok=True)
with open(amazon0302_metadata_file_name, "w") as f:
f.write(amazon0302_metadata)

amazon0302_dataset = datasets.Dataset(amazon0302_metadata_file_name)
amazon0302_dataset.metadata["file_type"] = ".gz"

dataset_param_values = [
# name: karate, nodes: 34, edges: 156
pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]),
Expand All @@ -46,6 +80,8 @@
pytest.param(
datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed]
),
# name: amazon0302, nodes: 262111, edges: 1234877
pytest.param(amazon0302_dataset, marks=[pytest.mark.medium, pytest.mark.directed]),
# name: cit-Patents, nodes: 3774768, edges: 16518948
pytest.param(
datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed]
Expand Down Expand Up @@ -113,19 +149,7 @@ def nx_graph_from_dataset(dataset_obj):
"""
create_using = nx.DiGraph if dataset_obj.metadata["is_directed"] else nx.Graph
names = dataset_obj.metadata["col_names"]
dtypes = dataset_obj.metadata["col_types"]
if isinstance(dataset_obj.metadata["header"], int):
header = dataset_obj.metadata["header"]
else:
header = None

pandas_edgelist = pd.read_csv(
dataset_obj.get_path(),
delimiter=dataset_obj.metadata["delim"],
names=names,
dtype=dict(zip(names, dtypes)),
header=header,
)
pandas_edgelist = dataset_obj.get_edgelist(download=True, reader="pandas")
G = nx.from_pandas_edgelist(
pandas_edgelist, source=names[0], target=names[1], create_using=create_using
)
Expand Down Expand Up @@ -272,7 +296,7 @@ def bench_from_networkx(benchmark, graph_obj):

# normalized_param_values = [True, False]
normalized_param_values = [True]
k_param_values = [10, 100, 1000]
k_param_values = [10, 20, 50, 100, 500, 1000]


@pytest.mark.parametrize(
Expand All @@ -281,7 +305,6 @@ def bench_from_networkx(benchmark, graph_obj):
@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}")
def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k):
G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)

if k > G.number_of_nodes():
pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def get_system_info():
ordered_datasets = [
"netscience",
"email_Eu_core",
"amazon0302",
"cit-patents",
"hollywood",
"soc-livejournal1",
Expand All @@ -174,6 +175,7 @@ def get_system_info():
dataset_meta = {
"netscience": ["1,461", "5,484", "Yes"],
"email_Eu_core": ["1,005", "25,571", "Yes"],
"amazon0302": ["262,111", "1,234,877", "Yes"],
"cit-patents": ["3,774,768", "16,518,948", "Yes"],
"hollywood": ["1,139,905", "57,515,616", "No"],
"soc-livejournal1": ["4,847,571", "68,993,773", "Yes"],
Expand Down
35 changes: 0 additions & 35 deletions benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py

This file was deleted.

18 changes: 12 additions & 6 deletions benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


# location to store datasets used for benchmarking
export RAPIDS_DATASET_ROOT_DIR=/datasets/cugraph
export RAPIDS_DATASET_ROOT_DIR=${RAPIDS_DATASET_ROOT_DIR:-/datasets/cugraph}
mkdir -p logs

# list of algos, datasets, and back-ends to use in combinations
Expand All @@ -30,6 +30,7 @@ algos="
datasets="
netscience
email_Eu_core
amazon0302
cit-patents
hollywood
soc-livejournal
Expand All @@ -40,6 +41,11 @@ backends="
None
cugraph-preconverted
"

# edit this directly to for pytest
# e.g. -k "and not 100 and not 1000"
bc_k_values=""

# check for --cpu-only or --gpu-only args
if [[ "$#" -eq 1 ]]; then
case $1 in
Expand All @@ -58,15 +64,15 @@ fi

for algo in $algos; do
for dataset in $datasets; do
# this script can be used to download benchmarking datasets by name via cugraph.datasets
python get_graph_bench_dataset.py $dataset
for backend in $backends; do
name="${backend}__${algo}__${dataset}"
echo "Running: $backend, $dataset, bench_$algo"
# command to preproduce test
# echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"

# uncomment to get command for reproducing test
# echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo $bc_k_values\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"

pytest -sv \
-k "$backend and $dataset and bench_$algo and not 1000" \
-k "$backend and $dataset and bench_$algo $bc_k_values" \
--benchmark-json="logs/${name}.json" \
bench_algos.py 2>&1 | tee "logs/${name}.out"
done
Expand Down
1 change: 0 additions & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
set -eoxu pipefail

package_name=$1
package_dir=$2

python_package_name=$(echo ${package_name}|sed 's/-/_/g')

Expand Down
25 changes: 12 additions & 13 deletions ci/test_wheel_cugraph-dgl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,16 @@
set -eoxu pipefail

package_name="cugraph-dgl"
package_dir="python/cugraph-dgl"

python_package_name=$(echo ${package_name}|sed 's/-/_/g')

mkdir -p ./dist
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

# Download wheels built during this job.
# Download the pylibcugraph, cugraph, and cugraph-dgl built in the previous step
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
python -m pip install ./local-deps/*.whl

# use 'ls' to expand wildcard before adding `[extra]` requires for pip
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
# pip creates wheels using python package names
python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]


# determine pytorch and DGL sources
PKG_CUDA_VER="$(echo ${CUDA_VERSION} | cut -d '.' -f1,2 | tr -d '.')"
PKG_CUDA_VER_MAJOR=${PKG_CUDA_VER:0:2}
if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then
Expand All @@ -32,8 +24,15 @@ fi
PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}"
DGL_URL="https://data.dgl.ai/wheels/torch-2.3/cu${PYTORCH_CUDA_VER}/repo.html"

rapids-logger "Installing PyTorch and DGL"
rapids-retry python -m pip install torch==2.3.0 --index-url ${PYTORCH_URL}
rapids-retry python -m pip install dgl==2.4.0 --find-links ${DGL_URL}
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
--extra-index-url "${PYTORCH_URL}" \
--find-links "${DGL_URL}" \
"$(echo ./local-deps/pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./local-deps/cugraph_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/cugraph_dgl_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
'dgl==2.4.0' \
'torch>=2.3.0,<2.4'

python -m pytest python/cugraph-dgl/tests
19 changes: 9 additions & 10 deletions ci/test_wheel_cugraph-equivariant.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,14 @@
set -eoxu pipefail

package_name="cugraph-equivariant"
package_dir="python/cugraph-equivariant"

python_package_name=$(echo ${package_name}|sed 's/-/_/g')

mkdir -p ./dist
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

# use 'ls' to expand wildcard before adding `[extra]` requires for pip
# Download the cugraph-equivariant built in the previous step
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
# pip creates wheels using python package names
python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]


# determine pytorch source
PKG_CUDA_VER="$(echo ${CUDA_VERSION} | cut -d '.' -f1,2 | tr -d '.')"
PKG_CUDA_VER_MAJOR=${PKG_CUDA_VER:0:2}
if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then
Expand All @@ -26,8 +21,12 @@ else
fi
PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}"

rapids-logger "Installing PyTorch and e3nn"
rapids-retry python -m pip install torch --index-url ${PYTORCH_URL}
rapids-retry python -m pip install e3nn
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
--extra-index-url "${PYTORCH_URL}" \
"$(echo ./dist/cugraph_equivariant_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
'e3nn' \
'torch>=2.3.0,<2.4'

python -m pytest python/cugraph-equivariant/cugraph_equivariant/tests
47 changes: 23 additions & 24 deletions ci/test_wheel_cugraph-pyg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,44 @@
set -eoxu pipefail

package_name="cugraph-pyg"
package_dir="python/cugraph-pyg"

python_package_name=$(echo ${package_name}|sed 's/-/_/g')

mkdir -p ./dist
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

# Download wheels built during this job.
# Download the pylibcugraph, cugraph, and cugraph-pyg built in the previous step
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
python -m pip install ./local-deps/*.whl

# use 'ls' to expand wildcard before adding `[extra]` requires for pip
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
# pip creates wheels using python package names
python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]

# RAPIDS_DATASET_ROOT_DIR is used by test scripts
export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"

# Used to skip certain examples in CI due to memory limitations
export CI_RUN=1

# determine pytorch and pyg sources
if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then
PYTORCH_URL="https://download.pytorch.org/whl/cu118"
PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu118.html"
else
PYTORCH_URL="https://download.pytorch.org/whl/cu121"
PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu121.html"
fi
rapids-logger "Installing PyTorch and PyG dependencies"
rapids-retry python -m pip install torch==2.3.0 --index-url ${PYTORCH_URL}
rapids-retry python -m pip install "torch-geometric>=2.5,<2.6"
rapids-retry python -m pip install \
ogb \
pyg_lib \
torch_scatter \
torch_sparse \
-f ${PYG_URL}

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
--extra-index-url "${PYTORCH_URL}" \
--find-links "${PYG_URL}" \
"$(echo ./local-deps/pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./local-deps/cugraph_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/cugraph_pyg_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
'ogb' \
'pyg_lib' \
'torch>=2.3.0,<2.4' \
'torch-geometric>=2.5,<2.6' \
'torch_scatter' \
'torch_sparse'

# RAPIDS_DATASET_ROOT_DIR is used by test scripts
export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"

# Used to skip certain examples in CI due to memory limitations
export CI_RUN=1

rapids-logger "pytest cugraph-pyg (single GPU)"
pushd python/cugraph-pyg/cugraph_pyg
Expand Down
4 changes: 2 additions & 2 deletions ci/test_wheel_cugraph.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -eoxu pipefail

Expand All @@ -8,4 +8,4 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcugraph-dep
python -m pip install --no-deps ./local-pylibcugraph-dep/pylibcugraph*.whl

./ci/test_wheel.sh cugraph python/cugraph
./ci/test_wheel.sh cugraph
2 changes: 1 addition & 1 deletion ci/test_wheel_nx-cugraph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
python -m pip install ./local-deps/*.whl

./ci/test_wheel.sh nx-cugraph python/nx-cugraph
./ci/test_wheel.sh nx-cugraph
Loading

0 comments on commit 38a1755

Please sign in to comment.