Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.04' into branch-24.0…
Browse files Browse the repository at this point in the history
…4_enforce-dtype-match
  • Loading branch information
jnke2016 committed Mar 10, 2024
2 parents 21971f0 + 47119c3 commit c900864
Show file tree
Hide file tree
Showing 676 changed files with 9,427 additions and 5,018 deletions.
11 changes: 10 additions & 1 deletion .devcontainer/cuda11.8-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,26 @@
"args": {
"CUDA": "11.8",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-ubuntu22.04"
"BASE": "rapidsai/devcontainers:24.04-cpp-cuda11.8-ubuntu22.04"
}
},
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/ucx:24.4": {
"version": "1.14.1"
},
"ghcr.io/rapidsai/devcontainers/features/cuda:24.4": {
"version": "11.8",
"installcuBLAS": true,
"installcuSOLVER": true,
"installcuRAND": true,
"installcuSPARSE": true
},
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/ucx",
"ghcr.io/rapidsai/devcontainers/features/cuda",
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.0",
"CUDA": "12.2",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04"
}
Expand All @@ -15,7 +15,7 @@
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.2-envs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
Expand All @@ -24,7 +24,7 @@
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.2-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,39 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.0",
"CUDA": "12.2",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda12.0-ubuntu22.04"
"BASE": "rapidsai/devcontainers:24.04-cpp-cuda12.2-ubuntu22.04"
}
},
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/ucx:24.4": {
"version": "1.14.1"
},
"ghcr.io/rapidsai/devcontainers/features/cuda:24.4": {
"version": "12.2",
"installcuBLAS": true,
"installcuSOLVER": true,
"installcuRAND": true,
"installcuSPARSE": true
},
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/ucx",
"ghcr.io/rapidsai/devcontainers/features/cuda",
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.2-venvs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.2-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Ignore cmake builds from local machine that might have occured before attempting Docker build. Including these files will cause CMake cache conflict issues
/cpp/build
/cpp/build
1 change: 1 addition & 0 deletions .github/ops-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ branch_checker: true
label_checker: true
release_drafter: true
recently_updated: true
forward_merger: true
2 changes: 1 addition & 1 deletion .github/workflows/add-to-project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
issues:
types:
- opened

pull_request_target:
types:
- opened
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_cugraph-pyg.sh
matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0"))
matrix_filter: map(select(.ARCH == "amd64"))
wheel-build-cugraph-equivariant:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand All @@ -179,8 +179,10 @@ jobs:
matrix_filter: map(select(.ARCH == "amd64"))
devcontainer:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04
uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@fix/devcontainer-json-location
with:
arch: '["amd64"]'
cuda: '["12.2"]'
node_type: cpu32
extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
build_command: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/test_wheel_cugraph-pyg.sh
matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0"))
matrix_filter: map(select(.ARCH == "amd64"))
wheel-tests-cugraph-equivariant:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ datasets/*
!datasets/karate-disjoint.csv
!datasets/netscience.csv

# nx-cugraph side effects
python/nx-cugraph/objects.inv

.pydevproject

Expand Down
7 changes: 5 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,21 @@
exclude: '^thirdparty'
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-added-large-files
- id: debug-statements
- id: end-of-file-fixer
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
language_version: python3
args: [--target-version=py38]
args: [--target-version=py39]
files: ^(python/.*|benchmarks/.*)$
exclude: ^python/nx-cugraph/
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
hooks:
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/cugraph-dgl/pytest-based/README.MD
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
## Run Benchmarks
## Run Benchmarks

#### SG
#### SG
```
pytest bench_cugraph_dgl_uniform_neighbor_sample.py -k "SG and fanout_10_25 and rmat_24_4" --benchmark-save='1_rmat_24_4.json'
```
#### MG
#### MG

```
DASK_NUM_WORKERS=2 pytest bench_cugraph_dgl_uniform_neighbor_sample.py -k "MG and fanout_10_25 and rmat_24_16" --benchmark-save='2_rmat_24_8.json'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,7 +39,7 @@
uniform_neighbor_sample,
)
from cugraph.generators import rmat
from cugraph.experimental import datasets
from cugraph import datasets
from cugraph.dask import uniform_neighbor_sample as uniform_neighbor_sample_mg

from cugraph_benchmarking import params
Expand Down
16 changes: 8 additions & 8 deletions benchmarks/cugraph/standalone/bulk_sampling/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@ Required:
the samples will be written to a new folder in /home/samples that
contains information about the sampling run as well as the time
of the run.

--dataset_root
The folder where datasets are stored. Uses the format described
in the input format section.

--datasets
Comma-separated list of datasets; can specify ogb or rmat (i.e. ogb_papers100M[2],rmat_22_16).
For ogb datasets, can provide replication factor using brackets.
Will attempt to read from dataset_root/<datset_name>.

Optional:
--fanouts
Comma-separated list of fanout values (i.e. [10, 25]).
The default fanout is [10, 25].

--batch_sizes
Comma-separated list of batch sizes (i.e. 500, 1000).
Defaults to "512,1024"
Expand All @@ -39,7 +39,7 @@ Optional:
Comma-separated list of seeds per call. Controls the number of input seed vertices processed
in a single sampling call.
Defaults to 524288

--reverse_edges
Whether to reverse the edges of the input edgelist. Should be set to False for PyG and True for DGL.
Defaults to False (PyG).
Expand All @@ -52,8 +52,8 @@ Optional:
--random_seed
Seed for random number generation.
Defaults to '62'


### Input Format
The script expects its input data in the following format:
```
Expand Down Expand Up @@ -159,4 +159,4 @@ GPUs per node is currently unsupported by this script but should be possible in

### Output
The results of training will be outputted to the logs directory with an `output.txt` file for each worker.
These will be overwritten upon each run. Accuracy is only reported on rank 0.
These will be overwritten upon each run. Accuracy is only reported on rank 0.
6 changes: 3 additions & 3 deletions benchmarks/cugraph/standalone/bulk_sampling/run_sampling.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ handleTimeout 120 python ${MG_UTILS_DIR}/wait_for_workers.py \

DASK_STARTUP_ERRORCODE=$LAST_EXITCODE

echo $SLURM_NODEID
echo $SLURM_NODEID
if [[ $SLURM_NODEID == 0 ]]; then
echo "Launching Python Script"
python ${SCRIPTS_DIR}/cugraph_bulk_sampling.py \
Expand All @@ -78,7 +78,7 @@ if [[ $SLURM_NODEID == 0 ]]; then
--batch_sizes $BATCH_SIZE \
--seeds_per_call_opts "524288" \
--num_epochs $NUM_EPOCHS \
--random_seed 42
--random_seed 42

echo "DONE" > ${SAMPLES_DIR}/status.txt
fi
Expand Down Expand Up @@ -108,4 +108,4 @@ sleep 2

if [[ $SLURM_NODEID == 0 ]]; then
rm ${SAMPLES_DIR}/status.txt
fi
fi
3 changes: 1 addition & 2 deletions benchmarks/cugraph/standalone/bulk_sampling/run_train_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#SBATCH -p luna
#SBATCH -J datascience_rapids_cugraphgnn-papers:bulkSamplingPyG
#SBATCH -N 1
#SBATCH -t 00:25:00
#SBATCH -t 00:25:00

CONTAINER_IMAGE=${CONTAINER_IMAGE:="please_specify_container"}
SCRIPTS_DIR=$(pwd)
Expand Down Expand Up @@ -81,4 +81,3 @@ srun \
--fanout $FANOUT \
--replication_factor $REPLICATION_FACTOR \
--num_epochs $NUM_EPOCHS

2 changes: 1 addition & 1 deletion benchmarks/dgl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ pytest dgl_benchmark.py::bench_dgl_pure_gpu
## For UVA Benchmarks
```
pytest dgl_benchmark.py::bench_dgl_uva
```
```
10 changes: 5 additions & 5 deletions benchmarks/shared/build_cugraph_ucx/README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ docker build -f cugraph_ucx.dockerfile . -t cugraph_ucx
docker run --privileged -it --gpus=all --net=host cugraph_ucx /bin/bash

#### Client Bandwidth Test
python3 test_client_bandwidth.py
python3 test_client_bandwidth.py

```bash
(base) root@exp02:/home# python3 test_client_bandwidth.py
(base) root@exp02:/home# python3 test_client_bandwidth.py
2022-12-19 13:31:30,867 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2022-12-19 13:31:30,867 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2022-12-19 13:31:30,891 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
Expand All @@ -30,8 +30,8 @@ Bandwidth = 5.2037 gb/s
#### Sampling Test
python3 test_cugraph_sampling.py
```bash
test_client_bandwidth.py test_cugraph_sampling.py
(base) root@exp02:/home# python3 test_cugraph_sampling.py
test_client_bandwidth.py test_cugraph_sampling.py
(base) root@exp02:/home# python3 test_cugraph_sampling.py
[1671456769.722931] [exp02:93 :0] parser.c:1989 UCX WARN unused environment variable: UCX_MEMTYPE_CACHE (maybe: UCX_MEMTYPE_CACHE?)
[1671456769.722931] [exp02:93 :0] parser.c:1989 UCX WARN (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)
2022-12-19 13:32:56,228 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
Expand All @@ -54,4 +54,4 @@ Sampling 1,000 took = 69.15879249572754 ms
Sampling 10,000 took = 89.63620662689209 ms
Sampling 100,000 took = 135.9888792037964 ms
----------------------------------------Completed Test----------------------------------------
```
```
4 changes: 2 additions & 2 deletions benchmarks/shared/build_cugraph_ucx/build-ucx.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
set -ex

Expand All @@ -16,4 +16,4 @@ mkdir build-linux && cd build-linux
--enable-mt --enable-numa --with-gnu-ld --with-rdmacm --with-verbs \
--with-cuda=${CUDA_HOME} \
${CONFIGURE_ARGS}
make -j install
make -j install
2 changes: 1 addition & 1 deletion benchmarks/shared/build_cugraph_ucx/cugraph_ucx.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ RUN gpuci_mamba_retry install -y -c pytorch -c rapidsai-nightly -c rapidsai -c c
tqdm


# Build ucx from source with IB support
# Build ucx from source with IB support
# on 1.14.x
RUN conda remove --force -y ucx ucx-proc

Expand Down
2 changes: 1 addition & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ cd "${package_dir}"

python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check

# pure-python packages should not have auditwheel run on them.
# pure-python packages should be marked as pure, and not have auditwheel run on them.
if [[ ${package_name} == "nx-cugraph" ]] || \
[[ ${package_name} == "cugraph-dgl" ]] || \
[[ ${package_name} == "cugraph-pyg" ]] || \
[[ ${package_name} == "cugraph-equivariant" ]]; then
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 dist
else
mkdir -p final_dist
python -m auditwheel repair -w final_dist dist/*
Expand Down
14 changes: 14 additions & 0 deletions ci/run_ctests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

# Support customizing the ctests' install location
cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcugraph/"
ctest --output-on-failure --no-tests=error "$@"

if [ -d "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcugraph_c/" ]; then
# Support customizing the ctests' install location
cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcugraph_c/"
ctest --output-on-failure --no-tests=error "$@"
fi
Loading

0 comments on commit c900864

Please sign in to comment.