From 26fffb992bfca28b2ea5a0ebea6247770e9a2ac6 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 9 Oct 2024 09:22:36 -0500 Subject: [PATCH 1/6] make package installations in CI stricter --- ci/build_docs.sh | 6 ++++-- ci/build_python.sh | 2 -- ci/test_cpp.sh | 7 ++++--- ci/test_python.sh | 6 +++--- ci/test_wheel.sh | 21 ++++++++++++++------- 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 453bd39c0..d87b9ee87 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -6,6 +6,9 @@ set -euo pipefail rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +export RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" + rapids-dependency-file-generator \ --output conda \ --file-key docs \ @@ -22,12 +25,11 @@ rapids-print-env rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) -export RAPIDS_VERSION_NUMBER="24.12" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - libwholegraph + "libwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}" rapids-logger "Build Doxygen docs" pushd cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 1a3812b36..505cf8c94 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -13,8 +13,6 @@ export CMAKE_GENERATOR=Ninja rapids-print-env -PACKAGES="libwholegraph" - CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) rapids-generate-version > ./VERSION diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 49f76376a..d08930030 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,6 +8,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -27,11 +29,10 @@ mkdir -p "${RAPIDS_TESTS_DIR}" rapids-print-env -PACKAGES="libwholegraph libwholegraph-tests" - rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - "${PACKAGES}" + "libwholegraph=${RAPIDS_MAJOR_MINOR_VERSION}" \ + "libwholegraph-tests=${RAPIDS_MAJOR_MINOR_VERSION}" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_python.sh b/ci/test_python.sh index 80bc3513f..f5818005c 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -8,6 +8,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + ARCH=$(arch) EXITCODE=0 @@ -44,13 +46,11 @@ mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" rapids-print-env -PACKAGES="pylibwholegraph" - rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ 'mkl<2024.1.0' \ - "${PACKAGES}" + "pylibwholegraph=${RAPIDS_MAJOR_MINOR_VERSION}" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 9fdeec250..00cbdbb11 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -e # abort the script on error set -o pipefail # piped commands propagate their error @@ -7,11 +7,9 @@ set -E # ERR traps are inherited by subcommands mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="pylibwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist - -# echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/pylibwholegraph*.whl) +RAPIDS_PY_WHEEL_NAME="pylibwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +# determine pytorch source PKG_CUDA_VER="$(echo ${CUDA_VERSION} | cut -d '.' -f1,2 | tr -d '.')" PKG_CUDA_VER_MAJOR=${PKG_CUDA_VER:0:2} if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then @@ -19,13 +17,22 @@ if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then else INDEX_URL="https://download.pytorch.org/whl/cu${PKG_CUDA_VER}" fi + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install \ + -v \ + --extra-index-url "${INDEX_URL}" \ + "$(echo ./dist/pylibwholegraph*.whl)" \ + numpy \ + pytest \ + pytest-forked \ + 'torch>=2.0.0a0' + RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" rapids-logger "Installing PyTorch" -rapids-retry python -m pip install --pre torch --index-url ${INDEX_URL} -rapids-retry python -m pip install pytest pytest-forked numpy rapids-logger "pytest pylibwholegraph" cd python/pylibwholegraph/pylibwholegraph/tests python -m pytest \ From 131174f342ea4cafaa5d8dc433a910c50bfce3f2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 9 Oct 2024 09:24:10 -0500 Subject: [PATCH 2/6] update-version.sh --- ci/release/update-version.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 3b66b2304..2b8b52ebd 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -81,4 +81,3 @@ for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done -sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh From bf0b61d95e5f2c3161bb0451dd5ad371561d0a8a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 9 Oct 2024 09:51:11 -0500 Subject: [PATCH 3/6] typos --- ci/test_cpp.sh | 4 ++-- ci/test_python.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index d08930030..0f7fbae0f 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -31,8 +31,8 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - "libwholegraph=${RAPIDS_MAJOR_MINOR_VERSION}" \ - "libwholegraph-tests=${RAPIDS_MAJOR_MINOR_VERSION}" + "libwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}" \ + "libwholegraph-tests=${RAPIDS_VERSION_MAJOR_MINOR}" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_python.sh b/ci/test_python.sh index f5818005c..b6e3c95fc 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -50,7 +50,7 @@ rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ 'mkl<2024.1.0' \ - "pylibwholegraph=${RAPIDS_MAJOR_MINOR_VERSION}" + "pylibwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}" rapids-logger "Check GPU usage" nvidia-smi From 353bf21c4d75276eae4f9117d6178e1307a61db6 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 9 Oct 2024 17:09:54 -0500 Subject: [PATCH 4/6] rapids-version instead of rapids-version-major-minor --- ci/build_docs.sh | 7 +++---- ci/test_cpp.sh | 6 +++--- ci/test_python.sh | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index d87b9ee87..dca22fb36 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -6,8 +6,7 @@ set -euo pipefail rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh -RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" -export RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" +RAPIDS_VERSION="$(rapids-version)" rapids-dependency-file-generator \ --output conda \ @@ -29,7 +28,7 @@ export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - "libwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}" + "libwholegraph=${RAPIDS_VERSION}" rapids-logger "Build Doxygen docs" pushd cpp @@ -40,4 +39,4 @@ popd rapids-logger "Output temp dir: ${RAPIDS_DOCS_DIR}" -rapids-upload-docs +RAPIDS_VERSION_NUMBER="$(rapids-verion-major-minor)" rapids-upload-docs diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 0f7fbae0f..09ecf7a83 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,7 +8,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh -RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +RAPIDS_VERSION="$(rapids-version)" rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ @@ -31,8 +31,8 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - "libwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}" \ - "libwholegraph-tests=${RAPIDS_VERSION_MAJOR_MINOR}" + "libwholegraph=${RAPIDS_VERSION}" \ + "libwholegraph-tests=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_python.sh b/ci/test_python.sh index b6e3c95fc..758a25dc7 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -8,7 +8,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh -RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" +RAPIDS_VERSION="$(rapids-version)" ARCH=$(arch) EXITCODE=0 @@ -50,7 +50,7 @@ rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ 'mkl<2024.1.0' \ - "pylibwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}" + "pylibwholegraph=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi From 2067242bd53785e8cf171e184b37aafbbb9dfc04 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 9 Oct 2024 18:12:56 -0500 Subject: [PATCH 5/6] Update ci/build_docs.sh --- ci/build_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index dca22fb36..d15182bcb 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -39,4 +39,4 @@ popd rapids-logger "Output temp dir: ${RAPIDS_DOCS_DIR}" -RAPIDS_VERSION_NUMBER="$(rapids-verion-major-minor)" rapids-upload-docs +RAPIDS_VERSION_NUMBER="$(rapids-version-major-minor)" rapids-upload-docs From 326d5121a9748f83432a5cad09f19f6bd0da4af3 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 10 Oct 2024 10:16:46 -0500 Subject: [PATCH 6/6] revert wheel testing changes --- ci/test_wheel.sh | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 00cbdbb11..9fdeec250 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023, NVIDIA CORPORATION. set -e # abort the script on error set -o pipefail # piped commands propagate their error @@ -7,9 +7,11 @@ set -E # ERR traps are inherited by subcommands mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="pylibwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +RAPIDS_PY_WHEEL_NAME="pylibwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/pylibwholegraph*.whl) -# determine pytorch source PKG_CUDA_VER="$(echo ${CUDA_VERSION} | cut -d '.' -f1,2 | tr -d '.')" PKG_CUDA_VER_MAJOR=${PKG_CUDA_VER:0:2} if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then @@ -17,22 +19,13 @@ if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then else INDEX_URL="https://download.pytorch.org/whl/cu${PKG_CUDA_VER}" fi - -# echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install \ - -v \ - --extra-index-url "${INDEX_URL}" \ - "$(echo ./dist/pylibwholegraph*.whl)" \ - numpy \ - pytest \ - pytest-forked \ - 'torch>=2.0.0a0' - RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" rapids-logger "Installing PyTorch" +rapids-retry python -m pip install --pre torch --index-url ${INDEX_URL} +rapids-retry python -m pip install pytest pytest-forked numpy rapids-logger "pytest pylibwholegraph" cd python/pylibwholegraph/pylibwholegraph/tests python -m pytest \