From 3789b70e1d6c40971d1d74249ccbd5221e707046 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Sun, 6 Oct 2024 20:55:08 -0400 Subject: [PATCH] Constrain versions of PyTorch and CI artifacts in CI Runs, upgrade to dgl 2.4 (#4690) We were pulling the wrong packages because the PyTorch version constraint wasn't tight enough. Hopefully these sorts of issues will be resolved in the `cugraph-gnn` repository going forward, where we can pin a specific pytorch version for testing. Authors: - Alex Barghi (https://github.com/alexbarghi-nv) - James Lamb (https://github.com/jameslamb) Approvers: - Ray Douglass (https://github.com/raydouglass) - https://github.com/jakirkham - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4690 --- ci/build_docs.sh | 34 ++++++++-------- ci/build_python.sh | 3 +- ci/test_cpp.sh | 6 ++- ci/test_notebooks.sh | 6 ++- ci/test_python.sh | 39 ++++++++----------- ci/test_wheel_cugraph-dgl.sh | 4 +- .../all_cuda-118_arch-x86_64.yaml | 3 +- .../all_cuda-125_arch-x86_64.yaml | 3 +- conda/recipes/cugraph-dgl/meta.yaml | 2 +- conda/recipes/cugraph-pyg/meta.yaml | 2 +- dependencies.yaml | 7 ++-- .../source/graph_support/DGL_support.md | 9 +++-- .../wholegraph/installation/container.md | 3 +- python/cugraph-dgl/README.md | 9 +++-- .../conda/cugraph_dgl_dev_cuda-118.yaml | 5 +-- .../conda/cugraph_pyg_dev_cuda-118.yaml | 5 +-- 16 files changed, 72 insertions(+), 68 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 55235c6ebb9..01c573c96ca 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -6,6 +6,10 @@ set -euo pipefail rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh +export RAPIDS_VERSION="$(rapids-version)" +export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR" + rapids-dependency-file-generator \ --output conda \ --file-key docs \ @@ -22,35 +26,31 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then CONDA_CUDA_VERSION="11.8" - DGL_CHANNEL="dglteam/label/cu118" + DGL_CHANNEL="dglteam/label/th23_cu118" else CONDA_CUDA_VERSION="12.1" - DGL_CHANNEL="dglteam/label/cu121" + DGL_CHANNEL="dglteam/label/th23_cu121" fi rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ --channel conda-forge \ - --channel pyg \ --channel nvidia \ --channel "${DGL_CHANNEL}" \ - libcugraph \ - pylibcugraph \ - cugraph \ - cugraph-pyg \ - cugraph-dgl \ - cugraph-service-server \ - cugraph-service-client \ - libcugraph_etl \ - pylibcugraphops \ - pylibwholegraph \ - pytorch \ + "libcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pylibcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph-pyg=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph-dgl=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph-service-server=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph-service-client=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "libcugraph_etl=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pylibcugraphops=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pylibwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pytorch>=2.3,<2.4" \ "cuda-version=${CONDA_CUDA_VERSION}" -export RAPIDS_VERSION="$(rapids-version)" -export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" -export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR" export RAPIDS_DOCS_DIR="$(mktemp -d)" for PROJECT in libcugraphops libwholegraph; do diff --git a/ci/build_python.sh b/ci/build_python.sh index 1ebc38b058b..c94cc2a0fce 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -61,7 +61,6 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ - --channel pyg \ --channel pytorch \ --channel pytorch-nightly \ conda/recipes/cugraph-pyg @@ -71,7 +70,7 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ - --channel dglteam \ + --channel dglteam/label/th23_cu118 \ --channel pytorch \ --channel pytorch-nightly \ conda/recipes/cugraph-dgl diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 6c14870164e..fb9ab1f5e4e 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,6 +8,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -30,7 +32,9 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - libcugraph libcugraph_etl libcugraph-tests + "libcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "libcugraph_etl=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "libcugraph-tests=${RAPIDS_VERSION_MAJOR_MINOR}.*" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh index 31ec56074f0..b22671b48dc 100755 --- a/ci/test_notebooks.sh +++ b/ci/test_notebooks.sh @@ -5,6 +5,8 @@ set -Eeuo pipefail . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + rapids-logger "Generate notebook testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -27,7 +29,9 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - libcugraph pylibcugraph cugraph + "libcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pylibcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")" NOTEBOOK_LIST="$(realpath "$(dirname "$0")/notebook_list.py")" diff --git a/ci/test_python.sh b/ci/test_python.sh index f21a06cf061..29b4c7be190 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -8,6 +8,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + rapids-logger "Generate Python testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -34,12 +36,12 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - libcugraph \ - pylibcugraph \ - cugraph \ - nx-cugraph \ - cugraph-service-server \ - cugraph-service-client + "libcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pylibcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "nx-cugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph-service-server=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph-service-client=${RAPIDS_VERSION_MAJOR_MINOR}.*" rapids-logger "Check GPU usage" nvidia-smi @@ -151,14 +153,13 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ --channel conda-forge \ - --channel dglteam/label/cu118 \ + --channel dglteam/label/th23_cu118 \ --channel nvidia \ - libcugraph \ - pylibcugraph \ - pylibcugraphops \ - cugraph \ - cugraph-dgl \ - 'dgl>=1.1.0.cu*,<=2.0.0.cu*' \ + "libcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pylibcugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pylibcugraphops=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "cugraph-dgl=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ 'pytorch>=2.3,<2.4' \ 'cuda-version=11.8' @@ -208,16 +209,10 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - --channel pyg \ - "cugraph-pyg" \ + "cugraph-pyg=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ + "pytorch>=2.3,<2.4" \ "ogb" - pip install \ - pyg_lib \ - torch_scatter \ - torch_sparse \ - -f ${PYG_URL} - rapids-print-env rapids-logger "pytest cugraph_pyg (single GPU)" @@ -253,7 +248,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then --channel "${PYTHON_CHANNEL}" \ --channel conda-forge \ --channel nvidia \ - cugraph-equivariant + "cugraph-equivariant=${RAPIDS_VERSION_MAJOR_MINOR}.*" pip install e3nn==0.5.1 rapids-print-env diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh index 9b79cb17fe4..688c58026bd 100755 --- a/ci/test_wheel_cugraph-dgl.sh +++ b/ci/test_wheel_cugraph-dgl.sh @@ -30,10 +30,10 @@ else PYTORCH_CUDA_VER=$PKG_CUDA_VER fi PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}" -DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html" +DGL_URL="https://data.dgl.ai/wheels/torch-2.3/cu${PYTORCH_CUDA_VER}/repo.html" rapids-logger "Installing PyTorch and DGL" rapids-retry python -m pip install torch==2.3.0 --index-url ${PYTORCH_URL} -rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL} +rapids-retry python -m pip install dgl==2.4.0 --find-links ${DGL_URL} python -m pytest python/cugraph-dgl/tests diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 7ae576e8288..239da7eb724 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -4,8 +4,7 @@ channels: - rapidsai - rapidsai-nightly - dask/label/dev -- pyg -- dglteam/label/cu118 +- dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 1fb04cae081..c975d891a1b 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -4,8 +4,7 @@ channels: - rapidsai - rapidsai-nightly - dask/label/dev -- pyg -- dglteam/label/cu118 +- dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index c80ca6890a8..0383fc8adf8 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -25,7 +25,7 @@ requirements: - setuptools>=61.0.0 run: - cugraph ={{ version }} - - dgl >=1.1.0.cu* + - dgl >=2.4.0.th23.cu* - numba >=0.57 - numpy >=1.23,<3.0a0 - pylibcugraphops ={{ minor_version }} diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 38d4a3d7d15..7d3e503e23a 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -36,7 +36,7 @@ requirements: - cugraph ={{ version }} - pylibcugraphops ={{ minor_version }} - tensordict >=0.1.2 - - pyg >=2.5,<2.6 + - pytorch_geometric >=2.5,<2.6 tests: imports: diff --git a/dependencies.yaml b/dependencies.yaml index 4da61cb00ad..b2f03ed3b9f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -323,8 +323,7 @@ channels: - rapidsai - rapidsai-nightly - dask/label/dev - - pyg - - dglteam/label/cu118 + - dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: @@ -700,7 +699,7 @@ dependencies: - &pytorch_conda pytorch>=2.3,<2.4.0a0 - pytorch-cuda==11.8 - &tensordict tensordict>=0.1.2 - - dgl>=1.1.0.cu* + - dgl>=2.4.0.cu* cugraph_pyg_dev: common: - output_types: [conda] @@ -709,7 +708,7 @@ dependencies: - *pytorch_conda - pytorch-cuda==11.8 - *tensordict - - pyg>=2.5,<2.6 + - pytorch_geometric>=2.5,<2.6 depends_on_pytorch: common: diff --git a/docs/cugraph/source/graph_support/DGL_support.md b/docs/cugraph/source/graph_support/DGL_support.md index ba9a28e3170..7d32a9efe37 100644 --- a/docs/cugraph/source/graph_support/DGL_support.md +++ b/docs/cugraph/source/graph_support/DGL_support.md @@ -8,9 +8,12 @@ Install and update cugraph-dgl and the required dependencies using the command: -``` -conda install mamba -n base -c conda-forge -mamba install cugraph-dgl -c rapidsai-nightly -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam +```shell +# CUDA 11 +conda install -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam/label/th23_cu118 cugraph-dgl + +# CUDA 12 +conda install -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam/label/th23_cu121 cugraph-dgl ``` ## Build from Source diff --git a/docs/cugraph/source/wholegraph/installation/container.md b/docs/cugraph/source/wholegraph/installation/container.md index 3a2c627c56a..6aac53cf88f 100644 --- a/docs/cugraph/source/wholegraph/installation/container.md +++ b/docs/cugraph/source/wholegraph/installation/container.md @@ -24,6 +24,7 @@ RUN pip3 install Cython setuputils3 scikit-build nanobind pytest-forked pytest To run GNN applications, you may also need cuGraphOps, DGL and/or PyG libraries to run the GNN layers. You may refer to [DGL](https://www.dgl.ai/pages/start.html) or [PyG](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) For example, to install DGL, you may need to add: + ```dockerfile -RUN pip3 install dgl -f https://data.dgl.ai/wheels/cu118/repo.html +RUN pip3 install dgl -f https://data.dgl.ai/wheels/torch-2.3/cu118/repo.html ``` diff --git a/python/cugraph-dgl/README.md b/python/cugraph-dgl/README.md index ac4cb2f6253..013d4fe5e2e 100644 --- a/python/cugraph-dgl/README.md +++ b/python/cugraph-dgl/README.md @@ -8,9 +8,12 @@ Install and update cugraph-dgl and the required dependencies using the command: -``` -conda install mamba -n base -c conda-forge -mamba install cugraph-dgl -c rapidsai-nightly -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam +```shell +# CUDA 11 +conda install -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam/label/th23_cu118 cugraph-dgl + +# CUDA 12 +conda install -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam/label/th23_cu121 cugraph-dgl ``` ## Build from Source diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml index bbb6a5082f6..3f30e521218 100644 --- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml +++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml @@ -4,13 +4,12 @@ channels: - rapidsai - rapidsai-nightly - dask/label/dev -- pyg -- dglteam/label/cu118 +- dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: - cugraph==24.10.*,>=0.0.0a0 -- dgl>=1.1.0.cu* +- dgl>=2.4.0.cu* - pandas - pre-commit - pylibcugraphops==24.10.*,>=0.0.0a0 diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml index d9afd52b9b7..6b798f3dc88 100644 --- a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml +++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml @@ -4,15 +4,13 @@ channels: - rapidsai - rapidsai-nightly - dask/label/dev -- pyg -- dglteam/label/cu118 +- dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: - cugraph==24.10.*,>=0.0.0a0 - pandas - pre-commit -- pyg>=2.5,<2.6 - pylibcugraphops==24.10.*,>=0.0.0a0 - pytest - pytest-benchmark @@ -20,6 +18,7 @@ dependencies: - pytest-xdist - pytorch-cuda==11.8 - pytorch>=2.3,<2.4.0a0 +- pytorch_geometric>=2.5,<2.6 - scipy - tensordict>=0.1.2 name: cugraph_pyg_dev_cuda-118