From 28afe764fa8cdab9e622d1d7b0c61166887e985d Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Wed, 17 Jan 2024 11:06:55 -0800 Subject: [PATCH 01/79] add build wheel script and accompanying version info --- .github/workflows/build.yaml | 19 +++++++++++++ .github/workflows/pr.yaml | 16 +++++++++++ .github/workflows/test.yaml | 9 +++++++ VERSION | 1 + ci/build_wheel.sh | 52 ++++++++++++++++++++++++++++++++++++ ci/test_wheel.sh | 28 +++++++++++++++++++ dependencies.yaml | 4 +++ python/ucxx/VERSION | 1 + python/ucxx/_version.py | 20 ++++++++++++++ 9 files changed, 150 insertions(+) create mode 100644 VERSION create mode 100755 ci/build_wheel.sh create mode 100644 ci/test_wheel.sh create mode 120000 python/ucxx/VERSION create mode 100644 python/ucxx/_version.py diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 2b9c9c89..f549d57b 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -43,3 +43,22 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} + wheel-build: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel.sh + wheel-publish: + needs: wheel-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: ucxx \ No newline at end of file diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 494415db..6c082063 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -16,6 +16,8 @@ jobs: - conda-cpp-build - conda-cpp-tests - conda-python-tests + - wheel-build + - wheel-tests secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 checks: @@ -43,3 +45,17 @@ jobs: with: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + wheel-build: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/build_wheel.sh + wheel-tests: + needs: wheel-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/test_wheel.sh \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 23d17854..4ed555b6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -32,3 +32,12 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + wheel-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel.sh \ No newline at end of file diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..9c0e7f4c --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.36.00a32 diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh new file mode 100755 index 00000000..0823cfce --- /dev/null +++ b/ci/build_wheel.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Copyright (c) 2023-2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_name="ucxx" +package_dir="python" + +source rapids-configure-sccache +source rapids-date-string + +version=$(rapids-generate-version) +commit=$(git rev-parse HEAD) + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# This is the version of the suffix with a preceding hyphen. It's used +# everywhere except in the final wheel name. +PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}" + +# Patch project metadata files to include the CUDA version suffix and version override. +pyproject_file="${package_dir}/pyproject.toml" + +sed -i -E "s/^name = \"${package_name}(.*)?\"$/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file} +echo "${version}" > VERSION +sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name//-/_}/_version.py" + +# For nightlies we want to ensure that we're pulling in alphas as well. The +# easiest way to do so is to augment the spec with a constraint containing a +# min alpha version that doesn't affect the version bounds but does allow usage +# of alpha versions for that dependency without --pre +alpha_spec='' +if ! rapids-is-release-build; then + alpha_spec=',>=0.0.0a0' +fi + +# add or replace -cuXX to package name. Determine if this is a package mention based +# on whether it has any sort of pinning. There's a usage of "rmm" in known_rapids that +# should not get this -cuXX tacked on. +sed -r -i -E "s/rmm(-cu[0-9]+|)([\=\<\>\!].*)/rmm${PACKAGE_CUDA_SUFFIX}\2/g" ${pyproject_file} +# Capture the pin and add the alpha spec to it as needed +sed -r -i -E "/${alpha_spec}\"(,|)$/! s/rmm-cu[0-9]+([\=\<\>\!].*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + +cd "${package_dir}" + +SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_PYTHON=ON -DUCXX_ENABLE_RMM=ON" \ + python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check + +python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="ucxx_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh new file mode 100644 index 00000000..243e9b68 --- /dev/null +++ b/ci/test_wheel.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +export PROJECT_NAME="ucxx" + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# On arm also need to install CMake because treelite needs to be compiled (no wheels available for arm). +if [[ "$(arch)" == "aarch64" ]]; then + python -m pip install cmake +fi + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] + +# Run smoke tests for aarch64 pull requests +# if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then +# python ci/wheel_smoke_test.py +# else +# python -m pytest ./python/${PROJECT_NAME}/tests -k 'not test_sparse_pca_inputs' -n 4 --ignore=python/cuml/tests/dask && python -m pytest ./python/${PROJECT_NAME}/tests -k 'test_sparse_pca_inputs' && python -m pytest ./python/cuml/tests/dask +# fi + +echo "Please add meaningful tests here. This file was copied from CuML and needs to be adapted to UCXX's needs." +exit 1 \ No newline at end of file diff --git a/dependencies.yaml b/dependencies.yaml index 181f0b6d..755bf0ae 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -87,6 +87,10 @@ dependencies: - scikit-build-core>=0.7.0 - output_types: [requirements, pyproject] packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for rmm. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple - scikit-build-core[pyproject]>=0.7.0 checks: common: diff --git a/python/ucxx/VERSION b/python/ucxx/VERSION new file mode 120000 index 00000000..974cf28e --- /dev/null +++ b/python/ucxx/VERSION @@ -0,0 +1 @@ +VERSION \ No newline at end of file diff --git a/python/ucxx/_version.py b/python/ucxx/_version.py new file mode 100644 index 00000000..1be8f627 --- /dev/null +++ b/python/ucxx/_version.py @@ -0,0 +1,20 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files("ucxx").joinpath("VERSION").read_text().strip() +) +__git_commit__ = "f16c8de52bbee0b83807348aaddba5e9c3e44c78" \ No newline at end of file From a92cf980667369d5ee177338de96dd89c3a9d2c8 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Wed, 17 Jan 2024 11:55:02 -0800 Subject: [PATCH 02/79] move pip args to requirements output --- VERSION | 2 +- dependencies.yaml | 4 +++- python/ucxx/VERSION | 2 +- python/ucxx/_version.py | 6 ++---- python/ucxx/examples/CMakeLists.txt | 2 ++ 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/VERSION b/VERSION index 9c0e7f4c..351b8d7e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.36.00a32 +0.36.00a33 diff --git a/dependencies.yaml b/dependencies.yaml index 755bf0ae..ba838e50 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -86,12 +86,14 @@ dependencies: packages: - scikit-build-core>=0.7.0 - output_types: [requirements, pyproject] + packages: + - scikit-build-core[pyproject]>=0.7.0 + - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file # This index is needed for rmm. - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple - - scikit-build-core[pyproject]>=0.7.0 checks: common: - output_types: [conda, requirements] diff --git a/python/ucxx/VERSION b/python/ucxx/VERSION index 974cf28e..558194c5 120000 --- a/python/ucxx/VERSION +++ b/python/ucxx/VERSION @@ -1 +1 @@ -VERSION \ No newline at end of file +../../VERSION \ No newline at end of file diff --git a/python/ucxx/_version.py b/python/ucxx/_version.py index 1be8f627..96556aca 100644 --- a/python/ucxx/_version.py +++ b/python/ucxx/_version.py @@ -14,7 +14,5 @@ import importlib.resources -__version__ = ( - importlib.resources.files("ucxx").joinpath("VERSION").read_text().strip() -) -__git_commit__ = "f16c8de52bbee0b83807348aaddba5e9c3e44c78" \ No newline at end of file +__version__ = importlib.resources.files("ucxx").joinpath("VERSION").read_text().strip() +__git_commit__ = "28afe764fa8cdab9e622d1d7b0c61166887e985d" diff --git a/python/ucxx/examples/CMakeLists.txt b/python/ucxx/examples/CMakeLists.txt index b7c3868c..b6f9402e 100644 --- a/python/ucxx/examples/CMakeLists.txt +++ b/python/ucxx/examples/CMakeLists.txt @@ -3,6 +3,8 @@ # SPDX-License-Identifier: BSD 3-Clause License # ================================================================================= +find_package(Python3 REQUIRED) + set(cython_sources python_future_task_app.pyx) set(linked_libraries ucxx::ucxx ucxx::python Python3::Python) From 14c8799292d5d73e60ab7bd1c74cd2952d03fed8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 21:13:13 +0000 Subject: [PATCH 03/79] Remove hardcoded versions --- VERSION | 2 +- python/ucxx/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index 351b8d7e..69f66ea1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.36.00a33 +0.36.00 diff --git a/python/ucxx/_version.py b/python/ucxx/_version.py index 96556aca..b755ee6f 100644 --- a/python/ucxx/_version.py +++ b/python/ucxx/_version.py @@ -15,4 +15,4 @@ import importlib.resources __version__ = importlib.resources.files("ucxx").joinpath("VERSION").read_text().strip() -__git_commit__ = "28afe764fa8cdab9e622d1d7b0c61166887e985d" +__git_commit__ = "" From d48d5a4f4c11700341851ce2dbf0ac3d0f66f10a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 21:16:17 +0000 Subject: [PATCH 04/79] Fix sed expressions so that they run (overzealous, but OK) --- ci/build_wheel.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 0823cfce..cffb1cdd 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -34,12 +34,8 @@ if ! rapids-is-release-build; then alpha_spec=',>=0.0.0a0' fi -# add or replace -cuXX to package name. Determine if this is a package mention based -# on whether it has any sort of pinning. There's a usage of "rmm" in known_rapids that -# should not get this -cuXX tacked on. -sed -r -i -E "s/rmm(-cu[0-9]+|)([\=\<\>\!].*)/rmm${PACKAGE_CUDA_SUFFIX}\2/g" ${pyproject_file} -# Capture the pin and add the alpha spec to it as needed -sed -r -i -E "/${alpha_spec}\"(,|)$/! s/rmm-cu[0-9]+([\=\<\>\!].*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} +# Add -cuXX to package name +sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} cd "${package_dir}" From 99b940d5583adb878095892128d1a00a7fe21e52 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 21:40:52 +0000 Subject: [PATCH 05/79] Find Development.Embed component to make Python3::Python target available --- python/CMakeLists.txt | 3 +++ python/ucxx/examples/CMakeLists.txt | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 400b5f2d..f04b99ab 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -44,6 +44,9 @@ endif() rapids_cython_init() +rapids_find_package( + Python3 REQUIRED COMPONENTS Development.Embed +) add_subdirectory(ucxx/examples) add_subdirectory(ucxx/_lib) diff --git a/python/ucxx/examples/CMakeLists.txt b/python/ucxx/examples/CMakeLists.txt index b6f9402e..b7c3868c 100644 --- a/python/ucxx/examples/CMakeLists.txt +++ b/python/ucxx/examples/CMakeLists.txt @@ -3,8 +3,6 @@ # SPDX-License-Identifier: BSD 3-Clause License # ================================================================================= -find_package(Python3 REQUIRED) - set(cython_sources python_future_task_app.pyx) set(linked_libraries ucxx::ucxx ucxx::python Python3::Python) From 7e2426c3a28832a6614739f6451f11d2e98e2468 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 21:43:22 +0000 Subject: [PATCH 06/79] SKBUILD_CMAKE_ARGS is semicolon-separated --- ci/build_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index cffb1cdd..72674a70 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -39,7 +39,7 @@ sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject cd "${package_dir}" -SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_PYTHON=ON -DUCXX_ENABLE_RMM=ON" \ +SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_PYTHON=ON;-DUCXX_ENABLE_RMM=ON" \ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* From 955b360e689331f80dcf6b83b4596c3a0d4c2d7e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 21:49:42 +0000 Subject: [PATCH 07/79] Don't change to dir and just use absolute paths everywhere --- ci/build_wheel.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 72674a70..ecba3527 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -37,10 +37,8 @@ fi # Add -cuXX to package name sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} -cd "${package_dir}" - SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_PYTHON=ON;-DUCXX_ENABLE_RMM=ON" \ - python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check + python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* From 6c894dbf9d5918d50258fd7f3a4535b9e30b9a4b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 21:55:23 +0000 Subject: [PATCH 08/79] Make sure ucxx_python is also installed --- python/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index f04b99ab..3eecb526 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -40,6 +40,7 @@ if(NOT ucxx_FOUND) # and modify the rpaths appropriately. set(cython_lib_dir ucxx) install(TARGETS ucxx DESTINATION ${cython_lib_dir}) + install(TARGETS ucxx_python DESTINATION ${cython_lib_dir}) endif() rapids_cython_init() From a175ee74e8884c61f91f6c361196638557bd3479 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 22:05:37 +0000 Subject: [PATCH 09/79] Make sure all associated targets are set --- python/ucxx/_lib/CMakeLists.txt | 3 ++- python/ucxx/examples/CMakeLists.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/ucxx/_lib/CMakeLists.txt b/python/ucxx/_lib/CMakeLists.txt index 21007f82..6f0c45a1 100644 --- a/python/ucxx/_lib/CMakeLists.txt +++ b/python/ucxx/_lib/CMakeLists.txt @@ -9,7 +9,8 @@ set(linked_libraries ucxx::ucxx ucxx::python Python3::Python) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS ucxx + LINKED_LIBRARIES "${linked_libraries}" + ASSOCIATED_TARGETS ucxx ucxx_python ) find_package(Python REQUIRED COMPONENTS Development NumPy) diff --git a/python/ucxx/examples/CMakeLists.txt b/python/ucxx/examples/CMakeLists.txt index b7c3868c..d654d239 100644 --- a/python/ucxx/examples/CMakeLists.txt +++ b/python/ucxx/examples/CMakeLists.txt @@ -10,6 +10,7 @@ rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" + ASSOCIATED_TARGETS ucxx ucxx_python ) target_include_directories(python_future_task_app PRIVATE ".") From ef88198e6b5c37ea7c1f7d5a4a4e5cc60e0f5f64 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 22:07:45 +0000 Subject: [PATCH 10/79] Enabling Python is now automatic --- ci/build_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index ecba3527..992e068c 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -37,7 +37,7 @@ fi # Add -cuXX to package name sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} -SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_PYTHON=ON;-DUCXX_ENABLE_RMM=ON" \ +SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* From a0c72b051e25976d75d8cd89e58573320103a957 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 22:09:32 +0000 Subject: [PATCH 11/79] Don't load gtest if not necessary --- cpp/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 68cd2037..8ef61916 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -92,7 +92,9 @@ rapids_cpm_init() # find rmm include(cmake/thirdparty/get_rmm.cmake) # find or install GoogleTest -include(cmake/thirdparty/get_gtest.cmake) +if(BUILD_TESTS) + include(cmake/thirdparty/get_gtest.cmake) +endif() # ################################################################################################## # * library targets ------------------------------------------------------------------------------- From 0d6c53aa9ad56a79fc17424b4f92e0854de5e009 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 22:13:58 +0000 Subject: [PATCH 12/79] Consolidate rmm logic and ensure it is only called when requested --- cpp/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8ef61916..7570b447 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -90,7 +90,9 @@ rapids_find_package( # add third party dependencies using CPM rapids_cpm_init() # find rmm -include(cmake/thirdparty/get_rmm.cmake) +if(UCXX_ENABLE_RMM) + include(cmake/thirdparty/get_rmm.cmake) +endif() # find or install GoogleTest if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) @@ -159,17 +161,15 @@ target_compile_definitions( # Enable RMM if necessary if(UCXX_ENABLE_RMM) - target_compile_definitions(ucxx PUBLIC UCXX_ENABLE_RMM) + target_link_libraries(ucxx PUBLIC rmm::rmm) + target_compile_definitions(ucxx PUBLIC UCXX_ENABLE_RMM "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") endif() # Define spdlog level -target_compile_definitions(ucxx PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") # Specify the target module library dependencies -target_link_libraries( - ucxx - PUBLIC rmm::rmm ucx::ucp -) +target_link_libraries(ucxx PUBLIC ucx::ucp) + # Add Conda library, and include paths if specified if(TARGET conda_env) From b7a9d6e13b324fea108f4fa76f5c3776641391dc Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 22:14:24 +0000 Subject: [PATCH 13/79] Properly enable Python by default --- python/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 3eecb526..97310331 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -31,6 +31,7 @@ include(rapids-cython-core) if(NOT ucxx_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) + set(UCXX_ENABLE_PYTHON ON) set(_exclude_from_all "") From df1150d407a23eb96ad8b828ecb33f12b7dbf50c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 22:28:50 +0000 Subject: [PATCH 14/79] Switch back to raw find_package --- python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 97310331..d0dec651 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -46,7 +46,7 @@ endif() rapids_cython_init() -rapids_find_package( +find_package( Python3 REQUIRED COMPONENTS Development.Embed ) add_subdirectory(ucxx/examples) From d0e20461847f4f3c590f5f2786a35b1f0d9860ab Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 17 Jan 2024 14:48:58 -0800 Subject: [PATCH 15/79] Make `ci/test_wheel.sh` executable --- ci/test_wheel.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/test_wheel.sh diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh old mode 100644 new mode 100755 From 697c7992c408dbcf8f7e19b56e1a4cafd54662de Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 18:11:28 -0500 Subject: [PATCH 16/79] Remove policy from upload artifact --- ci/build_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 992e068c..dae2f096 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -43,4 +43,4 @@ SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="ucxx_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist From e24e23cf8cad0a390469dc38f471b0554aabd64f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 17 Jan 2024 21:04:11 -0500 Subject: [PATCH 17/79] Also update cudf --- ci/build_wheel.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index dae2f096..83308567 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -36,6 +36,7 @@ fi # Add -cuXX to package name sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} +sed -r -i "s/cudf(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check From 3580d0b7d3c9052bb7526c4400b662a6162dcd9d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 18 Jan 2024 04:54:45 +0000 Subject: [PATCH 18/79] Fix cupy dependencies and patching --- ci/build_wheel.sh | 5 +++++ .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-120_arch-x86_64.yaml | 2 +- dependencies.yaml | 21 ++++++++++++++++++- python/pyproject.toml | 2 +- 5 files changed, 28 insertions(+), 4 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 83308567..938ad77c 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -38,6 +38,11 @@ fi sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} sed -r -i "s/cudf(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} +# Update cupy package name (different suffix from RAPIDS) +if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then + sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file} +fi + SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 749f555a..1e17cd15 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cuda-version=11.8 - cudatoolkit - cudf==24.2.* -- cupy +- cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 - dask diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 6fc17fee..ad753502 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cuda-cudart-dev - cuda-version=12.0 - cudf==24.2.* -- cupy +- cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 - dask diff --git a/dependencies.yaml b/dependencies.yaml index ba838e50..765d06a6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -187,10 +187,29 @@ dependencies: packages: - cloudpickle - cudf==24.2.* - - cupy - dask - distributed - numba>=0.57.1 - pytest - pytest-asyncio - pytest-rerunfailures + - output_types: conda + packages: + - cupy>=12.0.0 + specific: + - output_types: [requirements, pyproject] + matrices: + # All CUDA 12 versions + - matrix: {cuda: "12.2"} + packages: &cupy_packages_cu12 + - cupy-cuda12x>=12.0.0 + - {matrix: {cuda: "12.1"}, packages: *cupy_packages_cu12} + - {matrix: {cuda: "12.0"}, packages: *cupy_packages_cu12} + # All CUDA 11 versions + - matrix: {cuda: "11.8"} + packages: &cupy_packages_cu11 + - cupy-cuda11x>=12.0.0 + - {matrix: {cuda: "11.5"}, packages: *cupy_packages_cu11} + - {matrix: {cuda: "11.4"}, packages: *cupy_packages_cu11} + - {matrix: {cuda: "11.2"}, packages: *cupy_packages_cu11} + - {matrix: null, packages: *cupy_packages_cu11} diff --git a/python/pyproject.toml b/python/pyproject.toml index f36ad1c4..82249345 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -41,7 +41,7 @@ classifiers = [ test = [ "cloudpickle", "cudf==24.2.*", - "cupy", + "cupy-cuda11x>=12.0.0", "dask", "distributed", "numba>=0.57.1", From cd01361d58204f5867c08595aa384e78ea3cf23e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 02:18:14 -0800 Subject: [PATCH 19/79] Move CI C++/Python test implementations to common script --- ci/test_common.sh | 148 ++++++++++++++++++++++++++++++++++++++++++++++ ci/test_cpp.sh | 96 +++++------------------------- ci/test_python.sh | 73 ++--------------------- 3 files changed, 167 insertions(+), 150 deletions(-) create mode 100755 ci/test_common.sh diff --git a/ci/test_common.sh b/ci/test_common.sh new file mode 100755 index 00000000..ca46ac52 --- /dev/null +++ b/ci/test_common.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + +set -euo pipefail + +source "$(dirname "$0")/test_utils.sh" + +BINARY_PATH=${CONDA_PREFIX}/bin + + +##################################### C++ ###################################### +_SERVER_PORT=12345 + +run_cpp_tests() { + CMD_LINE="timeout 10m ${BINARY_PATH}/gtests/libucxx/UCXX_TEST" + + log_command "${CMD_LINE}" + UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} +} + +run_cpp_benchmark() { + SERVER_PORT=$1 + PROGRESS_MODE=$2 + + CMD_LINE_SERVER="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT}" + CMD_LINE_CLIENT="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT} 127.0.0.1" + + log_command "${CMD_LINE_SERVER}" + UCX_TCP_CM_REUSEADDR=y ${CMD_LINE_SERVER} & + sleep 1 + + log_command "${CMD_LINE_CLIENT}" + ${CMD_LINE_CLIENT} +} + +run_cpp_example() { + SERVER_PORT=$1 + PROGRESS_MODE=$2 + + CMD_LINE="timeout 1m ${BINARY_PATH}/examples/libucxx/ucxx_example_basic -m ${PROGRESS_MODE} -p ${SERVER_PORT}" + + log_command "${CMD_LINE}" + UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} +} + +run_cpp_port_retry() { + MAX_ATTEMPTS=${1} + RUN_TYPE=${2} + PROGRESS_MODE=${3} + + set +e + for attempt in $(seq 1 ${MAX_ATTEMPTS}); do + echo "Attempt ${attempt}/${MAX_ATTEMPTS} to run ${RUN_TYPE}" + + _SERVER_PORT=$((_SERVER_PORT + 1)) # Use different ports every time to prevent `Device is busy` + + if [[ "${RUN_TYPE}" == "benchmark" ]]; then + run_cpp_benchmark ${_SERVER_PORT} ${PROGRESS_MODE} + elif [[ "${RUN_TYPE}" == "example" ]]; then + run_cpp_example ${_SERVER_PORT} ${PROGRESS_MODE} + else + set -e + echo "Unknown test type "${RUN_TYPE}"" + exit 1 + fi + + LAST_STATUS=$? + if [ ${LAST_STATUS} -eq 0 ]; then + break; + fi + sleep 1 + done + set -e + + if [ ${LAST_STATUS} -ne 0 ]; then + echo "Failure running benchmark client after ${MAX_ATTEMPTS} attempts" + exit $LAST_STATUS + fi +} + + +#################################### Python #################################### +run_py_tests() { + CMD_LINE="timeout 2m pytest -vs python/ucxx/_lib/tests/" + log_command "${CMD_LINE}" + timeout 2m pytest -vs python/ucxx/_lib/tests/ +} + +run_py_tests_async() { + PROGRESS_MODE=$1 + ENABLE_DELAYED_SUBMISSION=$2 + ENABLE_PYTHON_FUTURE=$3 + SKIP=$4 + + CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50" + + if [ $SKIP -ne 0 ]; then + echo -e "\e[1;33mSkipping unstable test: ${CMD_LINE}\e[0m" + else + log_command "${CMD_LINE}" + UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50 + fi +} + +run_py_benchmark() { + BACKEND=$1 + PROGRESS_MODE=$2 + ASYNCIO_WAIT=$3 + ENABLE_DELAYED_SUBMISSION=$4 + ENABLE_PYTHON_FUTURE=$5 + N_BUFFERS=$6 + SLOW=$7 + + if [ $ASYNCIO_WAIT -ne 0 ]; then + ASYNCIO_WAIT="--asyncio-wait" + else + ASYNCIO_WAIT="" + fi + + CMD_LINE="UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT}" + + # Workaround for https://github.com/rapidsai/ucxx/issues/15 + CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" + + log_command "${CMD_LINE}" + if [ $SLOW -ne 0 ]; then + echo -e "\e[1;33mSLOW BENCHMARK: it may seem like a deadlock but will eventually complete.\e[0m" + fi + + UCX_KEEPALIVE_INTERVAL=1ms UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT} +} + +################################## Distributed ################################# +run_distributed_ucxx_tests() { + PROGRESS_MODE=$1 + ENABLE_DELAYED_SUBMISSION=$2 + ENABLE_PYTHON_FUTURE=$3 + + CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/" + + # Workaround for https://github.com/rapidsai/ucxx/issues/15 + # CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" + + log_command "${CMD_LINE}" + UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/ +} diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 5d72d463..a8d5d056 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -6,6 +6,7 @@ set -euo pipefail source "$(dirname "$0")/test_utils.sh" +source "$(dirname "$0")/test_common.sh" rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh @@ -24,75 +25,6 @@ print_system_stats BINARY_PATH=${CONDA_PREFIX}/bin -_SERVER_PORT=12345 - -run_tests() { - CMD_LINE="timeout 10m ${BINARY_PATH}/gtests/libucxx/UCXX_TEST" - - log_command "${CMD_LINE}" - UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} -} - -run_benchmark() { - SERVER_PORT=$1 - PROGRESS_MODE=$2 - - CMD_LINE_SERVER="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT}" - CMD_LINE_CLIENT="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT} 127.0.0.1" - - log_command "${CMD_LINE_SERVER}" - UCX_TCP_CM_REUSEADDR=y ${CMD_LINE_SERVER} & - sleep 1 - - log_command "${CMD_LINE_CLIENT}" - ${CMD_LINE_CLIENT} -} - -run_example() { - SERVER_PORT=$1 - PROGRESS_MODE=$2 - - CMD_LINE="timeout 1m ${BINARY_PATH}/examples/libucxx/ucxx_example_basic -m ${PROGRESS_MODE} -p ${SERVER_PORT}" - - log_command "${CMD_LINE}" - UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} -} - -run_port_retry() { - MAX_ATTEMPTS=${1} - RUN_TYPE=${2} - PROGRESS_MODE=${3} - - set +e - for attempt in $(seq 1 ${MAX_ATTEMPTS}); do - echo "Attempt ${attempt}/${MAX_ATTEMPTS} to run ${RUN_TYPE}" - - _SERVER_PORT=$((_SERVER_PORT + 1)) # Use different ports every time to prevent `Device is busy` - - if [[ "${RUN_TYPE}" == "benchmark" ]]; then - run_benchmark ${_SERVER_PORT} ${PROGRESS_MODE} - elif [[ "${RUN_TYPE}" == "example" ]]; then - run_example ${_SERVER_PORT} ${PROGRESS_MODE} - else - set -e - echo "Unknown test type "${RUN_TYPE}"" - exit 1 - fi - - LAST_STATUS=$? - if [ ${LAST_STATUS} -eq 0 ]; then - break; - fi - sleep 1 - done - set -e - - if [ ${LAST_STATUS} -ne 0 ]; then - echo "Failure running benchmark client after ${MAX_ATTEMPTS} attempts" - exit $LAST_STATUS - fi -} - rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) @@ -104,20 +36,20 @@ print_ucx_config rapids-logger "Run tests with conda package" rapids-logger "C++ Tests" -run_tests +run_cpp_tests rapids-logger "C++ Benchmarks" -# run_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE -run_port_retry 10 "benchmark" "polling" -run_port_retry 10 "benchmark" "blocking" -run_port_retry 10 "benchmark" "thread-polling" -run_port_retry 10 "benchmark" "thread-blocking" -run_port_retry 10 "benchmark" "wait" +# run_cpp_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE +run_cpp_port_retry 10 "benchmark" "polling" +run_cpp_port_retry 10 "benchmark" "blocking" +run_cpp_port_retry 10 "benchmark" "thread-polling" +run_cpp_port_retry 10 "benchmark" "thread-blocking" +run_cpp_port_retry 10 "benchmark" "wait" rapids-logger "C++ Examples" -# run_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE -run_port_retry 10 "example" "polling" -run_port_retry 10 "example" "blocking" -run_port_retry 10 "example" "thread-polling" -run_port_retry 10 "example" "thread-blocking" -run_port_retry 10 "example" "wait" +# run_cpp_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE +run_cpp_port_retry 10 "example" "polling" +run_cpp_port_retry 10 "example" "blocking" +run_cpp_port_retry 10 "example" "thread-polling" +run_cpp_port_retry 10 "example" "thread-blocking" +run_cpp_port_retry 10 "example" "wait" diff --git a/ci/test_python.sh b/ci/test_python.sh index 79b932db..777bc273 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -6,6 +6,7 @@ set -euo pipefail source "$(dirname "$0")/test_utils.sh" +source "$(dirname "$0")/test_common.sh" rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh @@ -22,70 +23,6 @@ rapids-print-env print_system_stats -run_tests() { - CMD_LINE="timeout 2m pytest -vs python/ucxx/_lib/tests/" - log_command "${CMD_LINE}" - timeout 2m pytest -vs python/ucxx/_lib/tests/ -} - -run_tests_async() { - PROGRESS_MODE=$1 - ENABLE_DELAYED_SUBMISSION=$2 - ENABLE_PYTHON_FUTURE=$3 - SKIP=$4 - - CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50" - - if [ $SKIP -ne 0 ]; then - echo -e "\e[1;33mSkipping unstable test: ${CMD_LINE}\e[0m" - else - log_command "${CMD_LINE}" - UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50 - fi -} - -run_py_benchmark() { - BACKEND=$1 - PROGRESS_MODE=$2 - ASYNCIO_WAIT=$3 - ENABLE_DELAYED_SUBMISSION=$4 - ENABLE_PYTHON_FUTURE=$5 - N_BUFFERS=$6 - SLOW=$7 - - if [ $ASYNCIO_WAIT -ne 0 ]; then - ASYNCIO_WAIT="--asyncio-wait" - else - ASYNCIO_WAIT="" - fi - - CMD_LINE="UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT}" - - # Workaround for https://github.com/rapidsai/ucxx/issues/15 - CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" - - log_command "${CMD_LINE}" - if [ $SLOW -ne 0 ]; then - echo -e "\e[1;33mSLOW BENCHMARK: it may seem like a deadlock but will eventually complete.\e[0m" - fi - - UCX_KEEPALIVE_INTERVAL=1ms UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT} -} - -run_distributed_ucxx_tests() { - PROGRESS_MODE=$1 - ENABLE_DELAYED_SUBMISSION=$2 - ENABLE_PYTHON_FUTURE=$3 - - CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/" - - # Workaround for https://github.com/rapidsai/ucxx/issues/15 - # CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" - - log_command "${CMD_LINE}" - UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/ -} - rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) @@ -103,12 +40,12 @@ print_ucx_config rapids-logger "Run tests with conda package" rapids-logger "Python Core Tests" -run_tests +run_py_tests rapids-logger "Python Async Tests" -# run_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP -run_tests_async thread 0 0 0 -run_tests_async thread 1 1 0 +# run_py_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP +run_py_tests_async thread 0 0 0 +run_py_tests_async thread 1 1 0 rapids-logger "Python Benchmarks" # run_py_benchmark BACKEND PROGRESS_MODE ASYNCIO_WAIT ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE NBUFFERS SLOW From 47ed4259171fb7c2662f8e0b78f3d4c822f2f1bb Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 02:21:12 -0800 Subject: [PATCH 20/79] Add basic wheel test set --- ci/test_wheel.sh | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 243e9b68..821186e1 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -5,6 +5,9 @@ set -euo pipefail export PROJECT_NAME="ucxx" +source "$(dirname "$0")/test_utils.sh" +source "$(dirname "$0")/test_common.sh" + mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist @@ -24,5 +27,18 @@ python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] # python -m pytest ./python/${PROJECT_NAME}/tests -k 'not test_sparse_pca_inputs' -n 4 --ignore=python/cuml/tests/dask && python -m pytest ./python/${PROJECT_NAME}/tests -k 'test_sparse_pca_inputs' && python -m pytest ./python/cuml/tests/dask # fi -echo "Please add meaningful tests here. This file was copied from CuML and needs to be adapted to UCXX's needs." -exit 1 \ No newline at end of file +print_ucx_config + +rapids-logger "C++ Tests" +run_cpp_tests + +rapids-logger "Python Core Tests" +run_py_tests + +rapids-logger "Python Async Tests" +# run_py_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP +run_py_tests_async thread 1 1 0 + +rapids-logger "Distributed Tests" +# run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE +run_distributed_ucxx_tests thread 1 1 From fd4aab732d8760f2c0736a173a7ea89f0bd0e558 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 02:57:58 -0800 Subject: [PATCH 21/79] Fix `BINARY_PATH` --- ci/test_common.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ci/test_common.sh b/ci/test_common.sh index ca46ac52..0b99f10e 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -7,13 +7,14 @@ set -euo pipefail source "$(dirname "$0")/test_utils.sh" -BINARY_PATH=${CONDA_PREFIX}/bin - ##################################### C++ ###################################### _SERVER_PORT=12345 run_cpp_tests() { + RUNTIME_PATH=${CONDA_PREFIX:-./} + BINARY_PATH=${RUNTIME_PATH}/bin + CMD_LINE="timeout 10m ${BINARY_PATH}/gtests/libucxx/UCXX_TEST" log_command "${CMD_LINE}" @@ -24,6 +25,9 @@ run_cpp_benchmark() { SERVER_PORT=$1 PROGRESS_MODE=$2 + RUNTIME_PATH=${CONDA_PREFIX:-./} + BINARY_PATH=${RUNTIME_PATH}/bin + CMD_LINE_SERVER="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT}" CMD_LINE_CLIENT="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT} 127.0.0.1" @@ -39,6 +43,9 @@ run_cpp_example() { SERVER_PORT=$1 PROGRESS_MODE=$2 + RUNTIME_PATH=${CONDA_PREFIX:-./} + BINARY_PATH=${RUNTIME_PATH}/bin + CMD_LINE="timeout 1m ${BINARY_PATH}/examples/libucxx/ucxx_example_basic -m ${PROGRESS_MODE} -p ${SERVER_PORT}" log_command "${CMD_LINE}" From 5f8b0c9b42ac080c3296c164268c9c2fc6aa17c8 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 04:42:53 -0800 Subject: [PATCH 22/79] Check for `ucx_info` presence before running --- ci/test_utils.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/test_utils.sh b/ci/test_utils.sh index 237b1f1d..42deaafc 100755 --- a/ci/test_utils.sh +++ b/ci/test_utils.sh @@ -20,5 +20,9 @@ print_system_stats() { print_ucx_config() { rapids-logger "UCX Version and Build Configuration" - ucx_info -v + if [ $(which ucx_info) == "" ]; then + echo "ucx_info not found" + else + ucx_info -v + fi } From 3742bb509a4d9dcc396849cf5f0e1e3b5a98bf10 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 05:10:13 -0800 Subject: [PATCH 23/79] Fix `ucx_info` chek --- ci/test_utils.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ci/test_utils.sh b/ci/test_utils.sh index 42deaafc..3f9299ff 100755 --- a/ci/test_utils.sh +++ b/ci/test_utils.sh @@ -20,9 +20,11 @@ print_system_stats() { print_ucx_config() { rapids-logger "UCX Version and Build Configuration" - if [ $(which ucx_info) == "" ]; then - echo "ucx_info not found" - else + + which ucx_info > /dev/null + if [ $? -eq 0 ]; then ucx_info -v + else + echo "ucx_info not found" fi } From 9305a32bfafdda21752ba73bea8b5ee7f0ce74fe Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 06:03:51 -0800 Subject: [PATCH 24/79] Prevent `which` call from causing a failure --- ci/test_utils.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/test_utils.sh b/ci/test_utils.sh index 3f9299ff..df443a2d 100755 --- a/ci/test_utils.sh +++ b/ci/test_utils.sh @@ -21,7 +21,9 @@ print_system_stats() { print_ucx_config() { rapids-logger "UCX Version and Build Configuration" + set +e which ucx_info > /dev/null + set -e if [ $? -eq 0 ]; then ucx_info -v else From 1da02e6014066c958cdd9822648d15d8dfe4b8d8 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 06:21:16 -0800 Subject: [PATCH 25/79] Simplify `ucx_info` error handling --- ci/test_utils.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ci/test_utils.sh b/ci/test_utils.sh index df443a2d..88bd8ba5 100755 --- a/ci/test_utils.sh +++ b/ci/test_utils.sh @@ -22,11 +22,6 @@ print_ucx_config() { rapids-logger "UCX Version and Build Configuration" set +e - which ucx_info > /dev/null + ucx_info -v set -e - if [ $? -eq 0 ]; then - ucx_info -v - else - echo "ucx_info not found" - fi } From fff8e48b0702f512ca57db3132ac9a3d0f78f5fd Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 07:13:39 -0800 Subject: [PATCH 26/79] Disable C++ testing in wheels tests --- ci/test_wheel.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 821186e1..decb355a 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -29,8 +29,8 @@ python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] print_ucx_config -rapids-logger "C++ Tests" -run_cpp_tests +# rapids-logger "C++ Tests" +# run_cpp_tests rapids-logger "Python Core Tests" run_py_tests From f888489086c77c43c8e1d10e53b463e0ce792bde Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 17:36:42 +0100 Subject: [PATCH 27/79] Run tests with `python -m` Co-authored-by: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> --- ci/test_common.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/test_common.sh b/ci/test_common.sh index 0b99f10e..d689cc69 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -90,9 +90,9 @@ run_cpp_port_retry() { #################################### Python #################################### run_py_tests() { - CMD_LINE="timeout 2m pytest -vs python/ucxx/_lib/tests/" + CMD_LINE="timeout 2m python -m pytest -vs python/ucxx/_lib/tests/" log_command "${CMD_LINE}" - timeout 2m pytest -vs python/ucxx/_lib/tests/ + timeout 2m python -m pytest -vs python/ucxx/_lib/tests/ } run_py_tests_async() { @@ -101,13 +101,13 @@ run_py_tests_async() { ENABLE_PYTHON_FUTURE=$3 SKIP=$4 - CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50" + CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m python -m pytest -vs python/ucxx/_lib_async/tests/ --durations=50" if [ $SKIP -ne 0 ]; then echo -e "\e[1;33mSkipping unstable test: ${CMD_LINE}\e[0m" else log_command "${CMD_LINE}" - UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50 + UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m python -m pytest -vs python/ucxx/_lib_async/tests/ --durations=50 fi } @@ -145,11 +145,11 @@ run_distributed_ucxx_tests() { ENABLE_DELAYED_SUBMISSION=$2 ENABLE_PYTHON_FUTURE=$3 - CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/" + CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/" # Workaround for https://github.com/rapidsai/ucxx/issues/15 # CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" log_command "${CMD_LINE}" - UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/ + UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/ } From 73788215b63d937e9a26c36214aaf33688749bbf Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 10:18:50 -0800 Subject: [PATCH 28/79] Patch UCX libraries --- ci/build_wheel.sh | 84 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 938ad77c..29cec174 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -48,5 +48,87 @@ SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* -RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +# Auditwheel rewrites dynamic libraries that are referenced at link time in the +# package. However, UCX loads a number of sub-libraries at runtime via dlopen; +# these are not picked up by auditwheel. Since we have a priori knowledge of +# what these libraries are, we mimic the behaviour of auditwheel by using the +# same hash-based uniqueness scheme and rewriting the link paths. + +WHL=$(realpath ${package_dir}/final_dist/ucxx*manylinux*.whl) + +# first grab the auditwheel hashes for libuc{tms} +LIBUCM=$(unzip -l $WHL | awk 'match($4, /libucm-[^\.]+\./) { print substr($4, RSTART) }') +LIBUCT=$(unzip -l $WHL | awk 'match($4, /libuct-[^\.]+\./) { print substr($4, RSTART) }') +LIBUCS=$(unzip -l $WHL | awk 'match($4, /libucs-[^\.]+\./) { print substr($4, RSTART) }') +LIBNUMA=$(unzip -l $WHL | awk 'match($4, /libnuma-[^\.]+\./) { print substr($4, RSTART) }') + +# Extract the libraries that have already been patched in by auditwheel +mkdir -p repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx +unzip $WHL "ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/*.so*" -d repair_dist/ + +# Patch the RPATH to include ORIGIN for each library +pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs +for f in libu*.so* +do + if [[ -f $f ]]; then + patchelf --add-rpath '$ORIGIN' $f + fi +done + +popd + +# Now copy in all the extra libraries that are only ever loaded at runtime +pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx +if [[ -d /usr/lib64/ucx ]]; then + cp -P /usr/lib64/ucx/* . +elif [[ -d /usr/lib/ucx ]]; then + cp -P /usr/lib/ucx/* . +else + echo "Could not find ucx libraries" + exit 1 +fi + +# we link against /lib/site-packages/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.lib/libuc{ptsm} +# we also amend the rpath to search one directory above to *find* libuc{tsm} +for f in libu*.so* +do + # Avoid patching symlinks, which is redundant + if [[ ! -L $f ]]; then + patchelf --replace-needed libuct.so.0 $LIBUCT $f + patchelf --replace-needed libucs.so.0 $LIBUCS $f + patchelf --replace-needed libucm.so.0 $LIBUCM $f + patchelf --replace-needed libnuma.so.1 $LIBNUMA $f + patchelf --add-rpath '$ORIGIN/..' $f + fi +done + +# Bring in cudart as well. To avoid symbol collision with other libraries e.g. +# cupy we mimic auditwheel by renaming the libraries to include the hashes of +# their names. Since there will typically be a chain of symlinks +# libcudart.so->libcudart.so.X->libcudart.so.X.Y.Z we need to follow the chain +# and rename all of them. + +find /usr/local/cuda/ -name "libcudart*.so*" | xargs cp -P -t . +src=libcudart.so +hash=$(sha256sum ${src} | awk '{print substr($1, 0, 8)}') +target=$(basename $(readlink -f ${src})) + +mv ${target} ${target/libcudart/libcudart-${hash}} +while readlink ${src} > /dev/null; do + target=$(readlink ${src}) + ln -s ${target/libcudart/libcudart-${hash}} ${src/libcudart/libcudart-${hash}} + rm -f ${src} + src=${target} +done + +to_rewrite=$(ldd libuct_cuda.so | awk '/libcudart/ { print $1 }') +patchelf --replace-needed ${to_rewrite} libcudart-${hash}.so libuct_cuda.so +patchelf --add-rpath '$ORIGIN' libuct_cuda.so + +popd + +pushd repair_dist +zip -r $WHL ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ +popd + RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist From 40d5e8a26d17c88daf87b410ae4fb2fb8cad1ba8 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 11:24:23 -0800 Subject: [PATCH 29/79] Switch to `rapids-dask-dependency` for `dask-cudf` --- ci/release/update-version.sh | 2 ++ conda/environments/all_cuda-118_arch-x86_64.yaml | 3 +-- conda/environments/all_cuda-120_arch-x86_64.yaml | 3 +-- dependencies.yaml | 3 +-- python/distributed-ucxx/pyproject.toml | 3 +-- python/pyproject.toml | 3 +-- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 51b97910..593bd7b8 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -50,6 +50,7 @@ DEPENDENCIES=( dask-cuda dask-cudf librmm + rapids-dask-dependency rmm ) for DEP in "${DEPENDENCIES[@]}"; do @@ -57,6 +58,7 @@ for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}\.*/g" ${FILE}; done sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}\.*\"/g" python/pyproject.toml; + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}\.*\"/g" python/distributed-ucxx/pyproject.toml; done # rapids-cmake version diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 1e17cd15..bf87025c 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -18,10 +18,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask - dask-cuda==24.2.* - dask-cudf==24.2.* -- distributed - fmt>=10.1.1,<11 - gmock>=1.13.0 - gtest>=1.13.0 @@ -38,6 +36,7 @@ dependencies: - pytest-asyncio - pytest-rerunfailures - python>=3.9,<3.11 +- rapids-dask-dependency==24.2.* - rmm==24.2.* - scikit-build-core>=0.7.0 - spdlog>=1.12.0,<1.13 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index ad753502..ca323f2e 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -18,10 +18,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask - dask-cuda==24.2.* - dask-cudf==24.2.* -- distributed - fmt>=10.1.1,<11 - gmock>=1.13.0 - gtest>=1.13.0 @@ -38,6 +36,7 @@ dependencies: - pytest-asyncio - pytest-rerunfailures - python>=3.9,<3.11 +- rapids-dask-dependency==24.2.* - rmm==24.2.* - scikit-build-core>=0.7.0 - spdlog>=1.12.0,<1.13 diff --git a/dependencies.yaml b/dependencies.yaml index 765d06a6..f5246efb 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -187,12 +187,11 @@ dependencies: packages: - cloudpickle - cudf==24.2.* - - dask - - distributed - numba>=0.57.1 - pytest - pytest-asyncio - pytest-rerunfailures + - rapids-dask-dependency==24.2.* - output_types: conda packages: - cupy>=12.0.0 diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index 93b071ba..1c6832b1 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -16,8 +16,7 @@ authors = [ license = { text = "Apache-2.0" } requires-python = ">=3.8" dependencies = [ - "dask >=2023.9.2", - "distributed >=2023.9.2", + "rapids-dask-dependency==24.2.*" "numba >=0.54", ] classifiers = [ diff --git a/python/pyproject.toml b/python/pyproject.toml index 82249345..dfa775c1 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -42,12 +42,11 @@ test = [ "cloudpickle", "cudf==24.2.*", "cupy-cuda11x>=12.0.0", - "dask", - "distributed", "numba>=0.57.1", "pytest", "pytest-asyncio", "pytest-rerunfailures", + "rapids-dask-dependency==24.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] From fb1c5234a448fe284624ed764116ada86e62acb9 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 11:35:28 -0800 Subject: [PATCH 30/79] Build wheel for distributed-ucxx package --- ci/build_wheel.sh | 196 +++++++++++++++-------------- ci/build_wheel_distributed_ucxx.sh | 10 ++ ci/build_wheel_ucxx.sh | 10 ++ 3 files changed, 121 insertions(+), 95 deletions(-) create mode 100644 ci/build_wheel_distributed_ucxx.sh create mode 100644 ci/build_wheel_ucxx.sh diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 29cec174..376f39fd 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -3,8 +3,10 @@ set -euo pipefail -package_name="ucxx" -package_dir="python" +# package_name="ucxx" +# package_dir="python" +package_name=$1 +package_dir=$2 source rapids-configure-sccache source rapids-date-string @@ -34,101 +36,105 @@ if ! rapids-is-release-build; then alpha_spec=',>=0.0.0a0' fi -# Add -cuXX to package name -sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} -sed -r -i "s/cudf(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} +if [[ ${package_name} == "distributed-ucxx" ]]; then + sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} +elif [[ ${package_name} == "ucxx" ]]; then + # Add -cuXX to package name + sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + sed -r -i "s/cudf(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} -# Update cupy package name (different suffix from RAPIDS) -if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then - sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file} -fi - -SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ - python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check - -python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* - -# Auditwheel rewrites dynamic libraries that are referenced at link time in the -# package. However, UCX loads a number of sub-libraries at runtime via dlopen; -# these are not picked up by auditwheel. Since we have a priori knowledge of -# what these libraries are, we mimic the behaviour of auditwheel by using the -# same hash-based uniqueness scheme and rewriting the link paths. - -WHL=$(realpath ${package_dir}/final_dist/ucxx*manylinux*.whl) - -# first grab the auditwheel hashes for libuc{tms} -LIBUCM=$(unzip -l $WHL | awk 'match($4, /libucm-[^\.]+\./) { print substr($4, RSTART) }') -LIBUCT=$(unzip -l $WHL | awk 'match($4, /libuct-[^\.]+\./) { print substr($4, RSTART) }') -LIBUCS=$(unzip -l $WHL | awk 'match($4, /libucs-[^\.]+\./) { print substr($4, RSTART) }') -LIBNUMA=$(unzip -l $WHL | awk 'match($4, /libnuma-[^\.]+\./) { print substr($4, RSTART) }') - -# Extract the libraries that have already been patched in by auditwheel -mkdir -p repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx -unzip $WHL "ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/*.so*" -d repair_dist/ - -# Patch the RPATH to include ORIGIN for each library -pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs -for f in libu*.so* -do - if [[ -f $f ]]; then - patchelf --add-rpath '$ORIGIN' $f + # Update cupy package name (different suffix from RAPIDS) + if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then + sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file} fi -done -popd + SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ + python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check + + python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* + + # Auditwheel rewrites dynamic libraries that are referenced at link time in the + # package. However, UCX loads a number of sub-libraries at runtime via dlopen; + # these are not picked up by auditwheel. Since we have a priori knowledge of + # what these libraries are, we mimic the behaviour of auditwheel by using the + # same hash-based uniqueness scheme and rewriting the link paths. + + WHL=$(realpath ${package_dir}/final_dist/ucxx*manylinux*.whl) + + # first grab the auditwheel hashes for libuc{tms} + LIBUCM=$(unzip -l $WHL | awk 'match($4, /libucm-[^\.]+\./) { print substr($4, RSTART) }') + LIBUCT=$(unzip -l $WHL | awk 'match($4, /libuct-[^\.]+\./) { print substr($4, RSTART) }') + LIBUCS=$(unzip -l $WHL | awk 'match($4, /libucs-[^\.]+\./) { print substr($4, RSTART) }') + LIBNUMA=$(unzip -l $WHL | awk 'match($4, /libnuma-[^\.]+\./) { print substr($4, RSTART) }') + + # Extract the libraries that have already been patched in by auditwheel + mkdir -p repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx + unzip $WHL "ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/*.so*" -d repair_dist/ + + # Patch the RPATH to include ORIGIN for each library + pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs + for f in libu*.so* + do + if [[ -f $f ]]; then + patchelf --add-rpath '$ORIGIN' $f + fi + done + + popd + + # Now copy in all the extra libraries that are only ever loaded at runtime + pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx + if [[ -d /usr/lib64/ucx ]]; then + cp -P /usr/lib64/ucx/* . + elif [[ -d /usr/lib/ucx ]]; then + cp -P /usr/lib/ucx/* . + else + echo "Could not find ucx libraries" + exit 1 + fi -# Now copy in all the extra libraries that are only ever loaded at runtime -pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx -if [[ -d /usr/lib64/ucx ]]; then - cp -P /usr/lib64/ucx/* . -elif [[ -d /usr/lib/ucx ]]; then - cp -P /usr/lib/ucx/* . + # we link against /lib/site-packages/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.lib/libuc{ptsm} + # we also amend the rpath to search one directory above to *find* libuc{tsm} + for f in libu*.so* + do + # Avoid patching symlinks, which is redundant + if [[ ! -L $f ]]; then + patchelf --replace-needed libuct.so.0 $LIBUCT $f + patchelf --replace-needed libucs.so.0 $LIBUCS $f + patchelf --replace-needed libucm.so.0 $LIBUCM $f + patchelf --replace-needed libnuma.so.1 $LIBNUMA $f + patchelf --add-rpath '$ORIGIN/..' $f + fi + done + + # Bring in cudart as well. To avoid symbol collision with other libraries e.g. + # cupy we mimic auditwheel by renaming the libraries to include the hashes of + # their names. Since there will typically be a chain of symlinks + # libcudart.so->libcudart.so.X->libcudart.so.X.Y.Z we need to follow the chain + # and rename all of them. + + find /usr/local/cuda/ -name "libcudart*.so*" | xargs cp -P -t . + src=libcudart.so + hash=$(sha256sum ${src} | awk '{print substr($1, 0, 8)}') + target=$(basename $(readlink -f ${src})) + + mv ${target} ${target/libcudart/libcudart-${hash}} + while readlink ${src} > /dev/null; do + target=$(readlink ${src}) + ln -s ${target/libcudart/libcudart-${hash}} ${src/libcudart/libcudart-${hash}} + rm -f ${src} + src=${target} + done + + to_rewrite=$(ldd libuct_cuda.so | awk '/libcudart/ { print $1 }') + patchelf --replace-needed ${to_rewrite} libcudart-${hash}.so libuct_cuda.so + patchelf --add-rpath '$ORIGIN' libuct_cuda.so + + popd + + pushd repair_dist + zip -r $WHL ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ + popd else - echo "Could not find ucx libraries" - exit 1 -fi - -# we link against /lib/site-packages/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.lib/libuc{ptsm} -# we also amend the rpath to search one directory above to *find* libuc{tsm} -for f in libu*.so* -do - # Avoid patching symlinks, which is redundant - if [[ ! -L $f ]]; then - patchelf --replace-needed libuct.so.0 $LIBUCT $f - patchelf --replace-needed libucs.so.0 $LIBUCS $f - patchelf --replace-needed libucm.so.0 $LIBUCM $f - patchelf --replace-needed libnuma.so.1 $LIBNUMA $f - patchelf --add-rpath '$ORIGIN/..' $f - fi -done - -# Bring in cudart as well. To avoid symbol collision with other libraries e.g. -# cupy we mimic auditwheel by renaming the libraries to include the hashes of -# their names. Since there will typically be a chain of symlinks -# libcudart.so->libcudart.so.X->libcudart.so.X.Y.Z we need to follow the chain -# and rename all of them. - -find /usr/local/cuda/ -name "libcudart*.so*" | xargs cp -P -t . -src=libcudart.so -hash=$(sha256sum ${src} | awk '{print substr($1, 0, 8)}') -target=$(basename $(readlink -f ${src})) - -mv ${target} ${target/libcudart/libcudart-${hash}} -while readlink ${src} > /dev/null; do - target=$(readlink ${src}) - ln -s ${target/libcudart/libcudart-${hash}} ${src/libcudart/libcudart-${hash}} - rm -f ${src} - src=${target} -done - -to_rewrite=$(ldd libuct_cuda.so | awk '/libcudart/ { print $1 }') -patchelf --replace-needed ${to_rewrite} libcudart-${hash}.so libuct_cuda.so -patchelf --add-rpath '$ORIGIN' libuct_cuda.so - -popd - -pushd repair_dist -zip -r $WHL ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ -popd - -RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist + echo "Unknown package '${package_name}'" + exit 1 diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh new file mode 100644 index 00000000..557e1c52 --- /dev/null +++ b/ci/build_wheel_distributed_ucxx.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/distributed_ucxx" + +./ci/build_wheel.sh dask-cudf ${package_dir} + +RAPIDS_PY_WHEEL_NAME="distributed_ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist diff --git a/ci/build_wheel_ucxx.sh b/ci/build_wheel_ucxx.sh new file mode 100644 index 00000000..7f8b8648 --- /dev/null +++ b/ci/build_wheel_ucxx.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python" + +./ci/build_wheel.sh ucxx ${package_dir} + +RAPIDS_PY_WHEEL_NAME="ucxx_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist From f470e02da3b398597060725f6bc4126ee1d1b148 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 11:46:23 -0800 Subject: [PATCH 31/79] Fix pyproject --- python/distributed-ucxx/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index 1c6832b1..417988d6 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -16,8 +16,8 @@ authors = [ license = { text = "Apache-2.0" } requires-python = ">=3.8" dependencies = [ - "rapids-dask-dependency==24.2.*" "numba >=0.54", + "rapids-dask-dependency==24.2.*" ] classifiers = [ "Intended Audience :: Developers", From e2e9951420e7ad6bdc4a3981d4df14793915773b Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 12:21:03 -0800 Subject: [PATCH 32/79] Add distributed-ucxx build/tests to GH workflows --- .github/workflows/build.yaml | 29 ++++++++++++++++---- .github/workflows/pr.yaml | 30 ++++++++++++++++----- .github/workflows/test.yaml | 13 +++++++-- ci/build_wheel.sh | 2 -- ci/build_wheel_distributed_ucxx.sh | 2 +- ci/test_wheel_distributed_ucxx.sh | 34 ++++++++++++++++++++++++ ci/{test_wheel.sh => test_wheel_ucxx.sh} | 7 ----- 7 files changed, 93 insertions(+), 24 deletions(-) create mode 100755 ci/test_wheel_distributed_ucxx.sh rename ci/{test_wheel.sh => test_wheel_ucxx.sh} (84%) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f549d57b..28e69ef2 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -43,7 +43,7 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} - wheel-build: + wheel-build-ucxx: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: @@ -51,9 +51,9 @@ jobs: branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} - script: ci/build_wheel.sh - wheel-publish: - needs: wheel-build + script: ci/build_wheel_ucxx.sh + wheel-publish-ucxx: + needs: wheel-build-ucxx secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 with: @@ -61,4 +61,23 @@ jobs: branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} - package-name: ucxx \ No newline at end of file + package-name: ucxx + wheel-build-distributed-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_distributed_ucxx.sh + wheel-publish-distributed-ucxx: + needs: wheel-build-distributed-ucxx + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: distributed_ucxx diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 6c082063..d64c87f1 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -16,8 +16,10 @@ jobs: - conda-cpp-build - conda-cpp-tests - conda-python-tests - - wheel-build - - wheel-tests + - wheel-build-ucxx + - wheel-tests-ucxx + - wheel-build-distributed-ucxx + - wheel-tests-distributed-ucxx secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 checks: @@ -45,17 +47,31 @@ jobs: with: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - wheel-build: + wheel-build-ucxx: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: build_type: pull-request - script: ci/build_wheel.sh - wheel-tests: - needs: wheel-build + script: ci/build_wheel_ucxx.sh + wheel-tests-ucxx: + needs: wheel-build-ucxx secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: pull-request - script: ci/test_wheel.sh \ No newline at end of file + script: ci/test_wheel_ucxx.sh + wheel-build-distributed-ucxx: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/build_wheel_distributed_ucxx.sh + wheel-tests-distributed-ucxx: + needs: wheel-build-distributed-ucxx + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/test_wheel_distributed_ucxx.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4ed555b6..d87e1d27 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -32,7 +32,7 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - wheel-tests: + wheel-tests-ucxx: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: @@ -40,4 +40,13 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} - script: ci/test_wheel.sh \ No newline at end of file + script: ci/test_wheel_ucxx.sh + wheel-tests-distributed-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel_distributed_ucxx.sh diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 376f39fd..90b8ad8c 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -3,8 +3,6 @@ set -euo pipefail -# package_name="ucxx" -# package_dir="python" package_name=$1 package_dir=$2 diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh index 557e1c52..0ea0a689 100644 --- a/ci/build_wheel_distributed_ucxx.sh +++ b/ci/build_wheel_distributed_ucxx.sh @@ -5,6 +5,6 @@ set -euo pipefail package_dir="python/distributed_ucxx" -./ci/build_wheel.sh dask-cudf ${package_dir} +./ci/build_wheel.sh distributed-ucxx ${package_dir} RAPIDS_PY_WHEEL_NAME="distributed_ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh new file mode 100755 index 00000000..74ce3d6b --- /dev/null +++ b/ci/test_wheel_distributed_ucxx.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +export PROJECT_NAME="distributed-ucxx" + +source "$(dirname "$0")/test_utils.sh" +source "$(dirname "$0")/test_common.sh" + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# On arm also need to install CMake because treelite needs to be compiled (no wheels available for arm). +if [[ "$(arch)" == "aarch64" ]]; then + python -m pip install cmake +fi + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] + +# Run smoke tests for aarch64 pull requests +# if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then +# python ci/wheel_smoke_test.py +# else +# python -m pytest ./python/${PROJECT_NAME}/tests -k 'not test_sparse_pca_inputs' -n 4 --ignore=python/cuml/tests/dask && python -m pytest ./python/${PROJECT_NAME}/tests -k 'test_sparse_pca_inputs' && python -m pytest ./python/cuml/tests/dask +# fi + +print_ucx_config + +rapids-logger "Distributed Tests" +# run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE +run_distributed_ucxx_tests thread 1 1 diff --git a/ci/test_wheel.sh b/ci/test_wheel_ucxx.sh similarity index 84% rename from ci/test_wheel.sh rename to ci/test_wheel_ucxx.sh index decb355a..6c3c2276 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel_ucxx.sh @@ -29,16 +29,9 @@ python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] print_ucx_config -# rapids-logger "C++ Tests" -# run_cpp_tests - rapids-logger "Python Core Tests" run_py_tests rapids-logger "Python Async Tests" # run_py_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP run_py_tests_async thread 1 1 0 - -rapids-logger "Distributed Tests" -# run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE -run_distributed_ucxx_tests thread 1 1 From 5858b0288ecd8f23c1cc40c7b1786036e5916946 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 12:34:26 -0800 Subject: [PATCH 33/79] Add execution permission for new wheel build scripts --- ci/build_wheel_distributed_ucxx.sh | 0 ci/build_wheel_ucxx.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/build_wheel_distributed_ucxx.sh mode change 100644 => 100755 ci/build_wheel_ucxx.sh diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh old mode 100644 new mode 100755 diff --git a/ci/build_wheel_ucxx.sh b/ci/build_wheel_ucxx.sh old mode 100644 new mode 100755 From 257ad3dc42f925ecda1cc4e7175533bba34b538e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 12:40:42 -0800 Subject: [PATCH 34/79] Add missing `fi` statement --- ci/build_wheel.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 90b8ad8c..fd1ca00b 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -136,3 +136,4 @@ elif [[ ${package_name} == "ucxx" ]]; then else echo "Unknown package '${package_name}'" exit 1 +fi From 07f9b42a9aed2818cde43cdcc211560eede65733 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 12:51:12 -0800 Subject: [PATCH 35/79] Fix distributed-ucxx package directory --- ci/build_wheel_distributed_ucxx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh index 0ea0a689..80ae694d 100755 --- a/ci/build_wheel_distributed_ucxx.sh +++ b/ci/build_wheel_distributed_ucxx.sh @@ -3,7 +3,7 @@ set -euo pipefail -package_dir="python/distributed_ucxx" +package_dir="python/distributed-ucxx" ./ci/build_wheel.sh distributed-ucxx ${package_dir} From 7818f337bcbf08f51e03e3a399da2db4a07abb3b Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 12:53:00 -0800 Subject: [PATCH 36/79] Set `RAPIDS_PY_CUDA_SUFFIX` in caller scripts --- ci/build_wheel.sh | 2 -- ci/build_wheel_distributed_ucxx.sh | 2 ++ ci/build_wheel_ucxx.sh | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index fd1ca00b..3a11cb23 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -12,8 +12,6 @@ source rapids-date-string version=$(rapids-generate-version) commit=$(git rev-parse HEAD) -RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" - # This is the version of the suffix with a preceding hyphen. It's used # everywhere except in the final wheel name. PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}" diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh index 80ae694d..f93c388a 100755 --- a/ci/build_wheel_distributed_ucxx.sh +++ b/ci/build_wheel_distributed_ucxx.sh @@ -5,6 +5,8 @@ set -euo pipefail package_dir="python/distributed-ucxx" +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + ./ci/build_wheel.sh distributed-ucxx ${package_dir} RAPIDS_PY_WHEEL_NAME="distributed_ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist diff --git a/ci/build_wheel_ucxx.sh b/ci/build_wheel_ucxx.sh index 7f8b8648..b62f6147 100755 --- a/ci/build_wheel_ucxx.sh +++ b/ci/build_wheel_ucxx.sh @@ -5,6 +5,8 @@ set -euo pipefail package_dir="python" +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + ./ci/build_wheel.sh ucxx ${package_dir} RAPIDS_PY_WHEEL_NAME="ucxx_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist From 0d7330beb2b6ac5849168acaaceff5544bdb41aa Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 13:03:19 -0800 Subject: [PATCH 37/79] Export `RAPIDS_PY_CUDA_SUFFIX` --- ci/build_wheel_distributed_ucxx.sh | 2 +- ci/build_wheel_ucxx.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh index f93c388a..bd328f16 100755 --- a/ci/build_wheel_distributed_ucxx.sh +++ b/ci/build_wheel_distributed_ucxx.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/distributed-ucxx" -RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +export RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" ./ci/build_wheel.sh distributed-ucxx ${package_dir} diff --git a/ci/build_wheel_ucxx.sh b/ci/build_wheel_ucxx.sh index b62f6147..f0f8f005 100755 --- a/ci/build_wheel_ucxx.sh +++ b/ci/build_wheel_ucxx.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python" -RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +export RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" ./ci/build_wheel.sh ucxx ${package_dir} From a43a9bf3062b395e4f4baacc55b4bc0b17f35891 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 13:19:00 -0800 Subject: [PATCH 38/79] Update distributed-ucxx versioning --- .../distributed_ucxx/_version.py | 23 +++++++++++++++++++ python/distributed-ucxx/pyproject.toml | 8 ------- 2 files changed, 23 insertions(+), 8 deletions(-) create mode 100644 python/distributed-ucxx/distributed_ucxx/_version.py diff --git a/python/distributed-ucxx/distributed_ucxx/_version.py b/python/distributed-ucxx/distributed_ucxx/_version.py new file mode 100644 index 00000000..536769cc --- /dev/null +++ b/python/distributed-ucxx/distributed_ucxx/_version.py @@ -0,0 +1,23 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files("distributed_ucxx") + .joinpath("VERSION") + .read_text() + .strip() +) +__git_commit__ = "" diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index 417988d6..93894ed8 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -109,11 +109,3 @@ exclude = [ "docs.*", "tests.*", ] - -[tool.versioneer] -VCS = "git" -style = "pep440" -versionfile_source = "distributed_ucxx/_version.py" -versionfile_build = "distributed_ucxx/_version.py" -tag_prefix = "v" -parentdir_prefix = "distributed_ucxx-" From 3e1fb71028dde7a620308bd1d492f33a1c9ae080 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 13:40:16 -0800 Subject: [PATCH 39/79] Add missing distributed-ucxx wheel build step --- ci/build_wheel.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 3a11cb23..4ded4978 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -34,6 +34,10 @@ fi if [[ ${package_name} == "distributed-ucxx" ]]; then sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} + + cd "${package_dir}" + + python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check elif [[ ${package_name} == "ucxx" ]]; then # Add -cuXX to package name sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} From 17adea4ecaaf580bce1f4ac1a699b4b035aaf666 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 14:12:44 -0800 Subject: [PATCH 40/79] Package name fixes --- ci/build_wheel_ucxx.sh | 2 +- ci/test_wheel_distributed_ucxx.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/build_wheel_ucxx.sh b/ci/build_wheel_ucxx.sh index f0f8f005..74598ad0 100755 --- a/ci/build_wheel_ucxx.sh +++ b/ci/build_wheel_ucxx.sh @@ -9,4 +9,4 @@ export RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION} ./ci/build_wheel.sh ucxx ${package_dir} -RAPIDS_PY_WHEEL_NAME="ucxx_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 74ce3d6b..15723ffd 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -3,7 +3,7 @@ set -euo pipefail -export PROJECT_NAME="distributed-ucxx" +export PROJECT_NAME="distributed_ucxx" source "$(dirname "$0")/test_utils.sh" source "$(dirname "$0")/test_common.sh" From 0ad395563c5f0a956b089c11ce508f739d21a127 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 14:26:18 -0800 Subject: [PATCH 41/79] Do not export `PROJECT_NAME` --- ci/test_wheel_distributed_ucxx.sh | 2 +- ci/test_wheel_ucxx.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 15723ffd..0b34688f 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -3,7 +3,7 @@ set -euo pipefail -export PROJECT_NAME="distributed_ucxx" +PROJECT_NAME="distributed_ucxx" source "$(dirname "$0")/test_utils.sh" source "$(dirname "$0")/test_common.sh" diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index 6c3c2276..35e9ac62 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -3,7 +3,7 @@ set -euo pipefail -export PROJECT_NAME="ucxx" +PROJECT_NAME="ucxx" source "$(dirname "$0")/test_utils.sh" source "$(dirname "$0")/test_common.sh" From bcbed26e7597b361585cb657d3d4ff5310689f09 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 14:42:59 -0800 Subject: [PATCH 42/79] Use `rapids-dask-dependency` for `distributed-ucxx` conda package --- conda/recipes/ucxx/meta.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conda/recipes/ucxx/meta.yaml b/conda/recipes/ucxx/meta.yaml index bf411a79..35ea7766 100644 --- a/conda/recipes/ucxx/meta.yaml +++ b/conda/recipes/ucxx/meta.yaml @@ -271,8 +271,7 @@ outputs: - tomli run: - python * *_cpython - - dask >=2023.9.2 - - distributed >=2023.9.2 + - rapids-dask-dependency ={{ minor_version }} - {{ pin_subpackage('ucxx', max_pin='x.x') }} test: imports: From f09d4d74390c9dc709eaf5ecf67eeca8c42dd4d5 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 15:05:29 -0800 Subject: [PATCH 43/79] Comment out distributed-ucxx wheels GH workflows --- .github/workflows/build.yaml | 38 ++++++++++++++++++------------------ .github/workflows/pr.yaml | 28 +++++++++++++------------- .github/workflows/test.yaml | 18 ++++++++--------- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 28e69ef2..6adb4700 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -62,22 +62,22 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: ucxx - wheel-build-distributed-ucxx: - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 - with: - build_type: ${{ inputs.build_type || 'branch' }} - branch: ${{ inputs.branch }} - sha: ${{ inputs.sha }} - date: ${{ inputs.date }} - script: ci/build_wheel_distributed_ucxx.sh - wheel-publish-distributed-ucxx: - needs: wheel-build-distributed-ucxx - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 - with: - build_type: ${{ inputs.build_type || 'branch' }} - branch: ${{ inputs.branch }} - sha: ${{ inputs.sha }} - date: ${{ inputs.date }} - package-name: distributed_ucxx + # wheel-build-distributed-ucxx: + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + # with: + # build_type: ${{ inputs.build_type || 'branch' }} + # branch: ${{ inputs.branch }} + # sha: ${{ inputs.sha }} + # date: ${{ inputs.date }} + # script: ci/build_wheel_distributed_ucxx.sh + # wheel-publish-distributed-ucxx: + # needs: wheel-build-distributed-ucxx + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + # with: + # build_type: ${{ inputs.build_type || 'branch' }} + # branch: ${{ inputs.branch }} + # sha: ${{ inputs.sha }} + # date: ${{ inputs.date }} + # package-name: distributed_ucxx diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index d64c87f1..28a41bed 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -61,17 +61,17 @@ jobs: with: build_type: pull-request script: ci/test_wheel_ucxx.sh - wheel-build-distributed-ucxx: - needs: checks - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 - with: - build_type: pull-request - script: ci/build_wheel_distributed_ucxx.sh - wheel-tests-distributed-ucxx: - needs: wheel-build-distributed-ucxx - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 - with: - build_type: pull-request - script: ci/test_wheel_distributed_ucxx.sh + # wheel-build-distributed-ucxx: + # needs: checks + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + # with: + # build_type: pull-request + # script: ci/build_wheel_distributed_ucxx.sh + # wheel-tests-distributed-ucxx: + # needs: wheel-build-distributed-ucxx + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + # with: + # build_type: pull-request + # script: ci/test_wheel_distributed_ucxx.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d87e1d27..a1e466e5 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -41,12 +41,12 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_ucxx.sh - wheel-tests-distributed-ucxx: - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 - with: - build_type: nightly - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - script: ci/test_wheel_distributed_ucxx.sh + # wheel-tests-distributed-ucxx: + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + # with: + # build_type: nightly + # branch: ${{ inputs.branch }} + # date: ${{ inputs.date }} + # sha: ${{ inputs.sha }} + # script: ci/test_wheel_distributed_ucxx.sh From d7dc39952c2e0aac3f75fca18d2ef6a8fb2a5574 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 18 Jan 2024 17:09:51 -0600 Subject: [PATCH 44/79] Only use rapids-dask-dependency in conda until we enable distributed-ucxx wheel tests with their own dependency list. --- dependencies.yaml | 6 +++++- python/pyproject.toml | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index f5246efb..334f5dab 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -183,6 +183,11 @@ dependencies: - *gmock test_python: common: + - output_types: [conda] + packages: + # TODO: Split dependency lists for ucxx and distributed-ucxx. This is + # a workaround as we currently only test distributed-ucxx with conda. + - rapids-dask-dependency==24.2.* - output_types: [conda, requirements, pyproject] packages: - cloudpickle @@ -191,7 +196,6 @@ dependencies: - pytest - pytest-asyncio - pytest-rerunfailures - - rapids-dask-dependency==24.2.* - output_types: conda packages: - cupy>=12.0.0 diff --git a/python/pyproject.toml b/python/pyproject.toml index 860189e4..83133ff0 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -46,7 +46,6 @@ test = [ "pytest", "pytest-asyncio", "pytest-rerunfailures", - "rapids-dask-dependency==24.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] From 3de787f42c87d0a999de134a5a86badc107594fd Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 18 Jan 2024 17:17:53 -0600 Subject: [PATCH 45/79] Skip distributed-ucxx jobs. --- .github/workflows/pr.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 28a41bed..62b06755 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,8 +18,8 @@ jobs: - conda-python-tests - wheel-build-ucxx - wheel-tests-ucxx - - wheel-build-distributed-ucxx - - wheel-tests-distributed-ucxx +# - wheel-build-distributed-ucxx +# - wheel-tests-distributed-ucxx secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 checks: From 535a830370c209bcdec1eabe61b4241e0338d613 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 18 Jan 2024 17:28:25 -0600 Subject: [PATCH 46/79] Use RAPIDS version for rapids-dask-dependency. --- ci/release/update-version.sh | 1 + conda/recipes/ucxx/meta.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 593bd7b8..32acf1a4 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -44,6 +44,7 @@ sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/distributed # bump RAPIDS libs sed_runner "/- librmm =/ s/=.*/=${NEXT_RAPIDS_VERSION}/g" conda/recipes/ucxx/meta.yaml sed_runner "/- rmm =/ s/=.*/=${NEXT_RAPIDS_VERSION}/g" conda/recipes/ucxx/meta.yaml +sed_runner "/- rapids-dask-dependency =/ s/=.*/=${NEXT_RAPIDS_VERSION}/g" conda/recipes/ucxx/meta.yaml DEPENDENCIES=( cudf diff --git a/conda/recipes/ucxx/meta.yaml b/conda/recipes/ucxx/meta.yaml index 35ea7766..2826f276 100644 --- a/conda/recipes/ucxx/meta.yaml +++ b/conda/recipes/ucxx/meta.yaml @@ -271,7 +271,7 @@ outputs: - tomli run: - python * *_cpython - - rapids-dask-dependency ={{ minor_version }} + - rapids-dask-dependency =24.02 - {{ pin_subpackage('ucxx', max_pin='x.x') }} test: imports: From 1b6c1df80dc59894840dd6cac018e5d37f5cb8d5 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 18 Jan 2024 23:27:27 -0800 Subject: [PATCH 47/79] Increase nofile ulimit in CI workflows for wheels --- .github/workflows/pr.yaml | 2 ++ .github/workflows/test.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 62b06755..cee1283d 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -60,6 +60,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: pull-request + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" script: ci/test_wheel_ucxx.sh # wheel-build-distributed-ucxx: # needs: checks @@ -74,4 +75,5 @@ jobs: # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 # with: # build_type: pull-request + # container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" # script: ci/test_wheel_distributed_ucxx.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a1e466e5..041fb799 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -40,6 +40,7 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" script: ci/test_wheel_ucxx.sh # wheel-tests-distributed-ucxx: # secrets: inherit @@ -49,4 +50,5 @@ jobs: # branch: ${{ inputs.branch }} # date: ${{ inputs.date }} # sha: ${{ inputs.sha }} + # container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" # script: ci/test_wheel_distributed_ucxx.sh From 27f68dcc89897d3cb91da2ba25bf0953e347aaa3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 08:23:16 -0600 Subject: [PATCH 48/79] Update to 24.04. --- .github/workflows/build.yaml | 8 ++++---- .github/workflows/pr.yaml | 8 ++++---- .github/workflows/test.yaml | 4 ++-- conda/recipes/ucxx/meta.yaml | 2 +- python/distributed-ucxx/pyproject.toml | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a058af2c..30094173 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -45,7 +45,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-ucxx: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: wheel-publish-ucxx: needs: wheel-build-ucxx secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -64,7 +64,7 @@ jobs: package-name: ucxx # wheel-build-distributed-ucxx: # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 # with: # build_type: ${{ inputs.build_type || 'branch' }} # branch: ${{ inputs.branch }} @@ -74,7 +74,7 @@ jobs: # wheel-publish-distributed-ucxx: # needs: wheel-build-distributed-ucxx # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + # uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 # with: # build_type: ${{ inputs.build_type || 'branch' }} # branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 26a7c16c..0adf388c 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -50,14 +50,14 @@ jobs: wheel-build-ucxx: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: pull-request script: ci/build_wheel_ucxx.sh wheel-tests-ucxx: needs: wheel-build-ucxx secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" @@ -65,14 +65,14 @@ jobs: # wheel-build-distributed-ucxx: # needs: checks # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 # with: # build_type: pull-request # script: ci/build_wheel_distributed_ucxx.sh # wheel-tests-distributed-ucxx: # needs: wheel-build-distributed-ucxx # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 # with: # build_type: pull-request # container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6accd904..ff9cda2b 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -34,7 +34,7 @@ jobs: container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" wheel-tests-ucxx: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} @@ -44,7 +44,7 @@ jobs: script: ci/test_wheel_ucxx.sh # wheel-tests-distributed-ucxx: # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 # with: # build_type: nightly # branch: ${{ inputs.branch }} diff --git a/conda/recipes/ucxx/meta.yaml b/conda/recipes/ucxx/meta.yaml index c16af743..2484fdde 100644 --- a/conda/recipes/ucxx/meta.yaml +++ b/conda/recipes/ucxx/meta.yaml @@ -271,7 +271,7 @@ outputs: - tomli run: - python * *_cpython - - rapids-dask-dependency =24.02 + - rapids-dask-dependency =24.04 - {{ pin_subpackage('ucxx', max_pin='x.x') }} test: imports: diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index f9a5e261..658ccf30 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -17,7 +17,7 @@ license = { text = "Apache-2.0" } requires-python = ">=3.8" dependencies = [ "numba >=0.54", - "rapids-dask-dependency==24.2.*" + "rapids-dask-dependency==24.4.*" ] classifiers = [ "Intended Audience :: Developers", From 0fa2763c994fde0717549edbe892b176f741dbe8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 08:46:07 -0600 Subject: [PATCH 49/79] Uncomment distributed-ucxx CI jobs. --- .github/workflows/build.yaml | 38 ++++++++++++++++++------------------ .github/workflows/pr.yaml | 34 ++++++++++++++++---------------- .github/workflows/test.yaml | 20 +++++++++---------- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 30094173..041c2483 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -62,22 +62,22 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: ucxx - # wheel-build-distributed-ucxx: - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 - # with: - # build_type: ${{ inputs.build_type || 'branch' }} - # branch: ${{ inputs.branch }} - # sha: ${{ inputs.sha }} - # date: ${{ inputs.date }} - # script: ci/build_wheel_distributed_ucxx.sh - # wheel-publish-distributed-ucxx: - # needs: wheel-build-distributed-ucxx - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 - # with: - # build_type: ${{ inputs.build_type || 'branch' }} - # branch: ${{ inputs.branch }} - # sha: ${{ inputs.sha }} - # date: ${{ inputs.date }} - # package-name: distributed_ucxx + wheel-build-distributed-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_distributed_ucxx.sh + wheel-publish-distributed-ucxx: + needs: wheel-build-distributed-ucxx + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: distributed_ucxx diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 0adf388c..6820a575 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,8 +18,8 @@ jobs: - conda-python-tests - wheel-build-ucxx - wheel-tests-ucxx -# - wheel-build-distributed-ucxx -# - wheel-tests-distributed-ucxx + - wheel-build-distributed-ucxx + - wheel-tests-distributed-ucxx secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 checks: @@ -62,18 +62,18 @@ jobs: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" script: ci/test_wheel_ucxx.sh - # wheel-build-distributed-ucxx: - # needs: checks - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 - # with: - # build_type: pull-request - # script: ci/build_wheel_distributed_ucxx.sh - # wheel-tests-distributed-ucxx: - # needs: wheel-build-distributed-ucxx - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 - # with: - # build_type: pull-request - # container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - # script: ci/test_wheel_distributed_ucxx.sh + wheel-build-distributed-ucxx: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + with: + build_type: pull-request + script: ci/build_wheel_distributed_ucxx.sh + wheel-tests-distributed-ucxx: + needs: wheel-build-distributed-ucxx + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + with: + build_type: pull-request + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: ci/test_wheel_distributed_ucxx.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ff9cda2b..73b47503 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -42,13 +42,13 @@ jobs: sha: ${{ inputs.sha }} container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" script: ci/test_wheel_ucxx.sh - # wheel-tests-distributed-ucxx: - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 - # with: - # build_type: nightly - # branch: ${{ inputs.branch }} - # date: ${{ inputs.date }} - # sha: ${{ inputs.sha }} - # container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - # script: ci/test_wheel_distributed_ucxx.sh + wheel-tests-distributed-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: ci/test_wheel_distributed_ucxx.sh From 05fb96bbb1a9e455b92449b37aeccd1d07a375c3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 08:47:48 -0600 Subject: [PATCH 50/79] Rename ucxx dependency lists. --- dependencies.yaml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 40bab688..e223334a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -7,15 +7,15 @@ files: arch: [x86_64] includes: - build_cpp - - build_python + - build_python_ucxx - checks - cuda - cuda_version - dev - py_version - - run_python + - run_python_ucxx - test_cpp - - test_python + - test_python_ucxx - depends_on_cupy - depends_on_rmm - depends_on_cudf @@ -37,29 +37,29 @@ files: includes: - checks - py_version - py_build: + py_build_ucxx: output: pyproject pyproject_dir: python extras: table: build-system includes: - - build_python + - build_python_ucxx - depends_on_rmm - py_run: + py_run_ucxx: output: pyproject pyproject_dir: python extras: table: project includes: - - run_python - py_test: + - run_python_ucxx + py_test_ucxx: output: pyproject pyproject_dir: python extras: table: project.optional-dependencies key: test includes: - - test_python + - test_python_ucxx - depends_on_cupy - depends_on_cudf channels: @@ -82,7 +82,7 @@ dependencies: - librmm==24.4.* - ninja - spdlog>=1.12.0,<1.13 - build_python: + build_python_ucxx: common: - output_types: [conda, requirements, pyproject] packages: @@ -166,7 +166,7 @@ dependencies: - matrix: packages: - python>=3.9,<3.11 - run_python: + run_python_ucxx: common: - output_types: [conda, requirements, pyproject] packages: @@ -182,7 +182,7 @@ dependencies: - *cmake_ver - *gtest - *gmock - test_python: + test_python_ucxx: common: - output_types: [conda] packages: From 0ee77f6296d207cf9f0360d07c4376f737102494 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 08:52:54 -0600 Subject: [PATCH 51/79] Use rapids-dependency-file-generator for distributed-ucxx. --- .../all_cuda-118_arch-x86_64.yaml | 4 ++ .../all_cuda-120_arch-x86_64.yaml | 4 ++ dependencies.yaml | 48 +++++++++++++++++-- python/distributed-ucxx/pyproject.toml | 15 +++--- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index d7f5f6ae..ad3fb21e 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -26,7 +26,9 @@ dependencies: - librmm==24.4.* - libtool - ninja +- numba >=0.54 - numba>=0.57.1 +- numpy - numpy>=1.21 - pip - pkg-config @@ -39,6 +41,8 @@ dependencies: - rapids-dask-dependency==24.4.* - rmm==24.4.* - scikit-build-core>=0.7.0 +- setuptools>=64.0.0 - spdlog>=1.12.0,<1.13 +- tomli - ucx name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index dfe2313d..f958ae31 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -26,7 +26,9 @@ dependencies: - librmm==24.4.* - libtool - ninja +- numba >=0.54 - numba>=0.57.1 +- numpy - numpy>=1.21 - pip - pkg-config @@ -39,6 +41,8 @@ dependencies: - rapids-dask-dependency==24.4.* - rmm==24.4.* - scikit-build-core>=0.7.0 +- setuptools>=64.0.0 - spdlog>=1.12.0,<1.13 +- tomli - ucx name: all_cuda-120_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index e223334a..3fac8532 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -8,14 +8,17 @@ files: includes: - build_cpp - build_python_ucxx + - build_python_distributed_ucxx - checks - cuda - cuda_version - dev - py_version - run_python_ucxx + - run_python_distributed_ucxx - test_cpp - test_python_ucxx + - test_python_distributed_ucxx - depends_on_cupy - depends_on_rmm - depends_on_cudf @@ -62,6 +65,28 @@ files: - test_python_ucxx - depends_on_cupy - depends_on_cudf + py_build_distributed_ucxx: + output: pyproject + pyproject_dir: python/distributed-ucxx + extras: + table: build-system + includes: + - build_python_distributed_ucxx + py_run_distributed_ucxx: + output: pyproject + pyproject_dir: python/distributed-ucxx + extras: + table: project + includes: + - run_python_distributed_ucxx + py_test_distributed_ucxx: + output: pyproject + pyproject_dir: python/distributed-ucxx + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_distributed_ucxx channels: - rapidsai - rapidsai-nightly @@ -95,6 +120,12 @@ dependencies: - output_types: [requirements, pyproject] packages: - scikit-build-core[pyproject]>=0.7.0 + build_python_distributed_ucxx: + common: + - output_types: [conda, requirements, pyproject] + packages: + - setuptools>=64.0.0 + - tomli checks: common: - output_types: [conda, requirements] @@ -175,6 +206,12 @@ dependencies: - output_types: [conda] packages: - ucx + run_python_distributed_ucxx: + common: + - output_types: [conda, requirements, pyproject] + packages: + - numba >=0.54 + - rapids-dask-dependency==24.4.* test_cpp: common: - output_types: conda @@ -184,11 +221,6 @@ dependencies: - *gmock test_python_ucxx: common: - - output_types: [conda] - packages: - # TODO: Split dependency lists for ucxx and distributed-ucxx. This is - # a workaround as we currently only test distributed-ucxx with conda. - - rapids-dask-dependency==24.4.* - output_types: [conda, requirements, pyproject] packages: - cloudpickle @@ -196,6 +228,12 @@ dependencies: - pytest - pytest-asyncio - pytest-rerunfailures + test_python_distributed_ucxx: + common: + - output_types: [conda, requirements, pyproject] + packages: + - numpy + - pytest depends_on_cupy: common: - output_types: conda diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index 658ccf30..bf7d2ce1 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -2,8 +2,8 @@ build-backend = "setuptools.build_meta" requires = [ "setuptools>=64.0.0", - "tomli ; python_version < '3.11'", -] + "tomli", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project] name = "distributed-ucxx" @@ -14,16 +14,15 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache-2.0" } -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "numba >=0.54", - "rapids-dask-dependency==24.4.*" -] + "rapids-dask-dependency==24.4.*", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ] @@ -38,9 +37,9 @@ docs = [ "sphinx-rtd-theme>=0.5.1", ] test = [ - "pytest", "numpy", -] + "pytest", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/ucxx" From 360f5590af9a8bfdf70349a5954caa237b15d385 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 09:10:40 -0600 Subject: [PATCH 52/79] Fix conda dependency list. --- dependencies.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index 3fac8532..b0119837 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -32,7 +32,8 @@ files: includes: - cuda_version - py_version - - test_python + - test_python_ucxx + - test_python_distributed_ucxx - depends_on_cupy - depends_on_cudf checks: From a12e42067ff119ad8ea19fd7d13b043215db97a2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 09:14:30 -0600 Subject: [PATCH 53/79] Add ucxx dependency to distributed-ucxx. --- .github/workflows/pr.yaml | 4 +++- dependencies.yaml | 21 +++++++++++++++++++++ python/distributed-ucxx/pyproject.toml | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 6820a575..64f0071e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -70,7 +70,9 @@ jobs: build_type: pull-request script: ci/build_wheel_distributed_ucxx.sh wheel-tests-distributed-ucxx: - needs: wheel-build-distributed-ucxx + needs: + - wheel-build-ucxx + - wheel-build-distributed-ucxx secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: diff --git a/dependencies.yaml b/dependencies.yaml index b0119837..ba813e74 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -80,6 +80,7 @@ files: table: project includes: - run_python_distributed_ucxx + - depends_on_ucxx py_test_distributed_ucxx: output: pyproject pyproject_dir: python/distributed-ucxx @@ -290,3 +291,23 @@ dependencies: packages: - cudf-cu11==24.4.* - {matrix: null, packages: [*cudf_conda]} + depends_on_ucxx: + common: + - output_types: conda + packages: + - &ucxx_conda ucxx==0.37.* + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.*"} + packages: + - ucxx-cu12==0.37.* + - matrix: {cuda: "11.*"} + packages: + - ucxx-cu11==0.37.* + - {matrix: null, packages: [*ucxx_conda]} diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index bf7d2ce1..f4778ada 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -18,6 +18,7 @@ requires-python = ">=3.9" dependencies = [ "numba >=0.54", "rapids-dask-dependency==24.4.*", + "ucxx==0.37.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From 0ab6f18a944fb81024b83e4189922005abaaff8e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 09:27:32 -0600 Subject: [PATCH 54/79] Update update-version.sh. --- ci/release/update-version.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 32acf1a4..751400a5 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -20,8 +20,9 @@ NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} NEXT_RAPIDS_VERSION="$(curl -sL https://version.gpuci.io/ucx-py/${NEXT_SHORT_TAG})" # Need to distutils-normalize the versions for some use cases -NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_RAPIDS_VERSION}'))") -echo "Next tag is ${NEXT_SHORT_TAG_PEP440}" +NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") +NEXT_RAPIDS_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_RAPIDS_VERSION}'))") +echo "Next tag is ${NEXT_RAPIDS_SHORT_TAG_PEP440}" echo "Preparing release: $NEXT_FULL_TAG" @@ -56,6 +57,18 @@ DEPENDENCIES=( ) for DEP in "${DEPENDENCIES[@]}"; do for FILE in dependencies.yaml conda/environments/*.yaml; do + sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_RAPIDS_SHORT_TAG_PEP440}\.*/g" ${FILE}; + done + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_RAPIDS_SHORT_TAG_PEP440}\.*\"/g" python/pyproject.toml; + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_RAPIDS_SHORT_TAG_PEP440}\.*\"/g" python/distributed-ucxx/pyproject.toml; +done + +UCXX_DEPENDENCIES=( + ucxx + distributed-ucxx +) +for DEP in "${UCXX_DEPENDENCIES[@]}"; do + for FILE in dependencies.yaml; do sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}\.*/g" ${FILE}; done sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}\.*\"/g" python/pyproject.toml; From dbb33505b96c93985f017afd15f739aa5f495897 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 09:29:28 -0600 Subject: [PATCH 55/79] Rewrite alpha_spec for distributed-ucxx's dependency on ucxx. --- ci/build_wheel.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 4ded4978..8ef0fa04 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -34,6 +34,7 @@ fi if [[ ${package_name} == "distributed-ucxx" ]]; then sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} + sed -r -i "s/ucxx(.*)\"/ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} cd "${package_dir}" From 3d97f46992d91f01a1e697af03c29339065880ad Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 09:37:01 -0600 Subject: [PATCH 56/79] Fix sed command. --- ci/build_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 8ef0fa04..dacd5047 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -34,7 +34,7 @@ fi if [[ ${package_name} == "distributed-ucxx" ]]; then sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} - sed -r -i "s/ucxx(.*)\"/ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + sed -r -i "s/\"ucxx(.*)\"/ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} cd "${package_dir}" From 6199aea46b89b152fda46b85e1644beb463e1d55 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 09:43:03 -0600 Subject: [PATCH 57/79] Fix sed command again. --- ci/build_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index dacd5047..baf5f564 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -34,7 +34,7 @@ fi if [[ ${package_name} == "distributed-ucxx" ]]; then sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} - sed -r -i "s/\"ucxx(.*)\"/ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + sed -r -i "s/\"ucxx(.*)\"/\"ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} cd "${package_dir}" From 199cb2511100bc5ada0e3c55bb329e877bfbf52a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 12:44:33 -0600 Subject: [PATCH 58/79] Install ucxx from this build when testing distributed-ucxx. --- ci/test_wheel_distributed_ucxx.sh | 7 +++---- ci/test_wheel_ucxx.sh | 5 ----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 0b34688f..4fa54dd4 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -12,10 +12,9 @@ mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist -# On arm also need to install CMake because treelite needs to be compiled (no wheels available for arm). -if [[ "$(arch)" == "aarch64" ]]; then - python -m pip install cmake -fi +# Install previously built ucxx wheel +RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-ucxx-dep +python -m pip install --no-deps ./local-ucxx-dep/ucxx*.whl # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index 35e9ac62..25b02dca 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -12,11 +12,6 @@ mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist -# On arm also need to install CMake because treelite needs to be compiled (no wheels available for arm). -if [[ "$(arch)" == "aarch64" ]]; then - python -m pip install cmake -fi - # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] From 128237d65e4efea3051630fc360e1bc4c6a42ab3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 19 Jan 2024 18:23:41 -0600 Subject: [PATCH 59/79] Install ucxx deps. --- ci/test_wheel_distributed_ucxx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 4fa54dd4..01549eb7 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -14,7 +14,7 @@ RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download- # Install previously built ucxx wheel RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-ucxx-dep -python -m pip install --no-deps ./local-ucxx-dep/ucxx*.whl +python -m pip install ./local-ucxx-dep/ucxx*.whl # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] From f5182cc5aca07ec69ce4d1a7657fe7457eee0adf Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 22 Jan 2024 10:34:58 -0600 Subject: [PATCH 60/79] Add missing RMM dependency to ucxx run requirements. --- dependencies.yaml | 1 + python/pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/dependencies.yaml b/dependencies.yaml index ba813e74..0d0a0623 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -56,6 +56,7 @@ files: table: project includes: - run_python_ucxx + - depends_on_rmm py_test_ucxx: output: pyproject pyproject_dir: python diff --git a/python/pyproject.toml b/python/pyproject.toml index 6c849da5..bccbafa4 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,6 +24,7 @@ requires-python = ">=3.9" dependencies = [ "numpy>=1.21", "pynvml>=11.4.1", + "rmm==24.4.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From 27573d9084ffe1660fd79e1717a061d0b2bbbd86 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 24 Jan 2024 14:08:24 -0600 Subject: [PATCH 61/79] Update VERSION to 0.37.00 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 69f66ea1..d142a90c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.36.00 +0.37.00 From 8b5007ff9d6bb844fa2bee418c17659b55aec9d2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 25 Jan 2024 13:10:13 -0600 Subject: [PATCH 62/79] Clean up merge diff. --- ci/release/update-version.sh | 1 - dependencies.yaml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 01ef5999..a9009056 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -55,7 +55,6 @@ DEPENDENCIES=( dask-cuda dask-cudf librmm - rapids-dask-dependency rmm rapids-dask-dependency ) diff --git a/dependencies.yaml b/dependencies.yaml index eb6c9d72..a35cd12d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -123,7 +123,7 @@ dependencies: - *cmake_ver - cython>=3.0.0 - ninja - - output_types: [conda] + - output_types: conda packages: - scikit-build-core>=0.7.0 - output_types: [requirements, pyproject] From 774893587a4de449c13d4749b621bdc3931cc9cc Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 25 Jan 2024 13:10:28 -0600 Subject: [PATCH 63/79] Add VERSION symlink to distributed-ucxx. --- python/distributed-ucxx/distributed_ucxx/VERSION | 1 + 1 file changed, 1 insertion(+) create mode 120000 python/distributed-ucxx/distributed_ucxx/VERSION diff --git a/python/distributed-ucxx/distributed_ucxx/VERSION b/python/distributed-ucxx/distributed_ucxx/VERSION new file mode 120000 index 00000000..d62dc733 --- /dev/null +++ b/python/distributed-ucxx/distributed_ucxx/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file From 35c211d129465517981c5000993853d86b8d0faf Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 25 Jan 2024 13:13:02 -0600 Subject: [PATCH 64/79] Update copyrights. --- ci/build_wheel.sh | 2 +- ci/test_common.sh | 2 +- ci/test_wheel_distributed_ucxx.sh | 2 +- ci/test_wheel_ucxx.sh | 2 +- python/ucxx/_version.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index baf5f564..df8b39ed 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. set -euo pipefail diff --git a/ci/test_common.sh b/ci/test_common.sh index d689cc69..c6cda357 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -1,6 +1,6 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: BSD-3-Clause set -euo pipefail diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 01549eb7..ddcad8b9 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. set -euo pipefail diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index 25b02dca..5327e133 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. set -euo pipefail diff --git a/python/ucxx/_version.py b/python/ucxx/_version.py index b755ee6f..a1f944f3 100644 --- a/python/ucxx/_version.py +++ b/python/ucxx/_version.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 1348dcb943372b2c674c8d202edd7ba3c71fb9e2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 25 Jan 2024 16:13:54 -0600 Subject: [PATCH 65/79] Add alpha_spec to rapids-dask-dependency. --- ci/build_wheel.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index df8b39ed..f5014666 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -41,6 +41,7 @@ if [[ ${package_name} == "distributed-ucxx" ]]; then python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check elif [[ ${package_name} == "ucxx" ]]; then # Add -cuXX to package name + sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} sed -r -i "s/cudf(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} From 02dc566f650a43d8d5f1c9e6f22d4a1a61936e24 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Mon, 29 Jan 2024 11:18:54 -0800 Subject: [PATCH 66/79] Install Distributed development version in wheel test --- ci/test_wheel_distributed_ucxx.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index ddcad8b9..ba2bc6ec 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -19,6 +19,12 @@ python -m pip install ./local-ucxx-dep/ucxx*.whl # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] +# TODO: We need distributed installed in developer mode to provide test utils, +# we still need to match to the `rapids-dask-dependency` version. +rapids-logger "Install Distributed in developer mode" +git clone https://github.com/dask/distributed /tmp/distributed +python -m pip install -e /tmp/distributed + # Run smoke tests for aarch64 pull requests # if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then # python ci/wheel_smoke_test.py From 3ef814b2ca7340d9fa4b6fda3f4624ed2d7cd323 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Mon, 29 Jan 2024 12:54:22 -0800 Subject: [PATCH 67/79] Add smoke tests for aarch wheel --- ci/test_wheel_distributed_ucxx.sh | 20 ++-- ci/test_wheel_ucxx.sh | 25 +++-- ci/wheel_smoke_test_distributed_ucxx.py | 65 +++++++++++++ ci/wheel_smoke_test_ucxx.py | 116 ++++++++++++++++++++++++ 4 files changed, 203 insertions(+), 23 deletions(-) create mode 100644 ci/wheel_smoke_test_distributed_ucxx.py create mode 100644 ci/wheel_smoke_test_ucxx.py diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index ba2bc6ec..5b6ee4c1 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -25,15 +25,15 @@ rapids-logger "Install Distributed in developer mode" git clone https://github.com/dask/distributed /tmp/distributed python -m pip install -e /tmp/distributed -# Run smoke tests for aarch64 pull requests -# if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then -# python ci/wheel_smoke_test.py -# else -# python -m pytest ./python/${PROJECT_NAME}/tests -k 'not test_sparse_pca_inputs' -n 4 --ignore=python/cuml/tests/dask && python -m pytest ./python/${PROJECT_NAME}/tests -k 'test_sparse_pca_inputs' && python -m pytest ./python/cuml/tests/dask -# fi - print_ucx_config -rapids-logger "Distributed Tests" -# run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE -run_distributed_ucxx_tests thread 1 1 +# Run smoke tests for aarch64 pull requests +if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then + rapids-logger "Distributed Smoke Tests" + pytest -vs ci/wheel_smoke_test.py +else + rapids-logger "Distributed Tests" + + # run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE + run_distributed_ucxx_tests thread 1 1 +fi diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index 5327e133..8e380fd6 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -15,18 +15,17 @@ RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download- # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] -# Run smoke tests for aarch64 pull requests -# if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then -# python ci/wheel_smoke_test.py -# else -# python -m pytest ./python/${PROJECT_NAME}/tests -k 'not test_sparse_pca_inputs' -n 4 --ignore=python/cuml/tests/dask && python -m pytest ./python/${PROJECT_NAME}/tests -k 'test_sparse_pca_inputs' && python -m pytest ./python/cuml/tests/dask -# fi - print_ucx_config -rapids-logger "Python Core Tests" -run_py_tests - -rapids-logger "Python Async Tests" -# run_py_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP -run_py_tests_async thread 1 1 0 +# Run smoke tests for aarch64 pull requests +if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then + rapids-logger "Python Async Smoke Tests" + pytest -vs ci/wheel_smoke_test.py +else + rapids-logger "Python Core Tests" + run_py_tests + + rapids-logger "Python Async Tests" + # run_py_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP + run_py_tests_async thread 1 1 0 +fi diff --git a/ci/wheel_smoke_test_distributed_ucxx.py b/ci/wheel_smoke_test_distributed_ucxx.py new file mode 100644 index 00000000..71d83351 --- /dev/null +++ b/ci/wheel_smoke_test_distributed_ucxx.py @@ -0,0 +1,65 @@ +import asyncio + +import pytest + +from distributed.comm import connect, listen +from distributed.protocol import to_serialize + +import ucxx + +import distributed_ucxx # noqa: E402 +from distributed_ucxx.utils_test import gen_test, ucxx_loop + + +try: + HOST = ucxx.get_address() +except Exception: + HOST = "127.0.0.1" + + +async def get_comm_pair( + listen_addr=f"ucxx://{HOST}", listen_args=None, connect_args=None, **kwargs +): + listen_args = listen_args or {} + connect_args = connect_args or {} + q = asyncio.queues.Queue() + + async def handle_comm(comm): + await q.put(comm) + + listener = listen(listen_addr, handle_comm, **listen_args, **kwargs) + async with listener: + comm = await connect(listener.contact_address, **connect_args, **kwargs) + serv_comm = await q.get() + return (comm, serv_comm) + + +@pytest.mark.parametrize( + "g", + [ + lambda cudf: cudf.Series([1, 2, 3]), + lambda cudf: cudf.DataFrame({"a": [1, 2, None], "b": [1.0, 2.0, None]}), + ], +) +@gen_test() +async def test_ping_pong_cudf(ucxx_loop, g): + # if this test appears after cupy an import error arises + # *** ImportError: /usr/lib/x86_64-linux-gnu/libstdc++.so.6: version `CXXABI_1.3.11' + # not found (required by python3.7/site-packages/pyarrow/../../../libarrow.so.12) + cudf = pytest.importorskip("cudf") + from cudf.testing._utils import assert_eq + + cudf_obj = g(cudf) + + com, serv_com = await get_comm_pair() + msg = {"op": "ping", "data": to_serialize(cudf_obj)} + + await com.write(msg) + result = await serv_com.read() + + cudf_obj_2 = result.pop("data") + assert result["op"] == "ping" + assert_eq(cudf_obj, cudf_obj_2) + + await com.close() + await serv_com.close() diff --git a/ci/wheel_smoke_test_ucxx.py b/ci/wheel_smoke_test_ucxx.py new file mode 100644 index 00000000..c3b9dec9 --- /dev/null +++ b/ci/wheel_smoke_test_ucxx.py @@ -0,0 +1,116 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + +import asyncio +import pickle + +import numpy as np +import pytest + +import ucxx + +cudf = pytest.importorskip("cudf") +distributed = pytest.importorskip("distributed") +cuda = pytest.importorskip("numba.cuda") + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "g", + [ + lambda cudf: cudf.Series([1, 2, 3]), + lambda cudf: cudf.DataFrame({"a": np.random.random(1200000)}), + ], +) +async def test_send_recv_cudf(event_loop, g): + from distributed.utils import nbytes + + class UCX: + def __init__(self, ep): + self.ep = ep + + async def write(self, cdf): + header, _frames = cdf.serialize() + frames = [pickle.dumps(header)] + _frames + + # Send meta data + await self.ep.send(np.array([len(frames)], dtype=np.uint64)) + await self.ep.send( + np.array( + [hasattr(f, "__cuda_array_interface__") for f in frames], + dtype=bool, + ) + ) + await self.ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64)) + # Send frames + for frame in frames: + if nbytes(frame) > 0: + await self.ep.send(frame) + + async def read(self): + try: + # Recv meta data + nframes = np.empty(1, dtype=np.uint64) + await self.ep.recv(nframes) + is_cudas = np.empty(nframes[0], dtype=bool) + await self.ep.recv(is_cudas) + sizes = np.empty(nframes[0], dtype=np.uint64) + await self.ep.recv(sizes) + except ( + ucxx.exceptions.UCXCanceledError, + ucxx.exceptions.UCXCloseError, + ) as e: + msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST" + raise e(msg) + else: + # Recv frames + frames = [] + for is_cuda, size in zip(is_cudas.tolist(), sizes.tolist()): + if size > 0: + if is_cuda: + frame = cuda.device_array((size,), dtype=np.uint8) + else: + frame = np.empty(size, dtype=np.uint8) + await self.ep.recv(frame) + frames.append(frame) + else: + if is_cuda: + frames.append(cuda.device_array((0,), dtype=np.uint8)) + else: + frames.append(b"") + return frames + + class UCXListener: + def __init__(self): + self.comm = None + + def start(self): + async def serve_forever(ep): + ucx = UCX(ep) + self.comm = ucx + + self.ucxx_server = ucxx.create_listener(serve_forever) + + uu = UCXListener() + uu.start() + uu.address = ucxx.get_address() + uu.client = await ucxx.create_endpoint(uu.address, uu.ucxx_server.port) + ucx = UCX(uu.client) + await asyncio.sleep(0.2) + msg = g(cudf) + frames, _ = await asyncio.gather(uu.comm.read(), ucx.write(msg)) + ucx_header = pickle.loads(frames[0]) + cudf_buffer = frames[1:] + typ = type(msg) + res = typ.deserialize(ucx_header, cudf_buffer) + + from cudf.testing._utils import assert_eq + + assert_eq(res, msg) + await uu.comm.ep.close() + await uu.client.close() + + assert uu.client.closed + assert uu.comm.ep.closed + del uu.ucxx_server + ucxx.reset() From dd4e43943aa6113b2d6f7b71ef4bc3c229270b6a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 29 Jan 2024 16:44:54 -0800 Subject: [PATCH 68/79] Don't print ucx info --- ci/test_wheel_distributed_ucxx.sh | 2 -- ci/test_wheel_ucxx.sh | 2 -- 2 files changed, 4 deletions(-) diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 5b6ee4c1..65b62ae9 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -25,8 +25,6 @@ rapids-logger "Install Distributed in developer mode" git clone https://github.com/dask/distributed /tmp/distributed python -m pip install -e /tmp/distributed -print_ucx_config - # Run smoke tests for aarch64 pull requests if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then rapids-logger "Distributed Smoke Tests" diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index 8e380fd6..1f4cb305 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -15,8 +15,6 @@ RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download- # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] -print_ucx_config - # Run smoke tests for aarch64 pull requests if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then rapids-logger "Python Async Smoke Tests" From e782fc7c4f5ea6bed6241a3b25b3c8d5c336599e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 30 Jan 2024 07:55:15 +0000 Subject: [PATCH 69/79] Run pytest via python --- ci/test_wheel_distributed_ucxx.sh | 2 +- ci/test_wheel_ucxx.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 65b62ae9..080bc05c 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -28,7 +28,7 @@ python -m pip install -e /tmp/distributed # Run smoke tests for aarch64 pull requests if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then rapids-logger "Distributed Smoke Tests" - pytest -vs ci/wheel_smoke_test.py + python -m pytest -vs ci/wheel_smoke_test.py else rapids-logger "Distributed Tests" diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index 1f4cb305..f0140fb2 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -18,7 +18,7 @@ python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] # Run smoke tests for aarch64 pull requests if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then rapids-logger "Python Async Smoke Tests" - pytest -vs ci/wheel_smoke_test.py + python -m pytest -vs ci/wheel_smoke_test.py else rapids-logger "Python Core Tests" run_py_tests From cc017be326d5d9d75c64abc8429a6c086a865ba1 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 05:19:27 -0800 Subject: [PATCH 70/79] Fix path to wheel smoke tests --- ci/test_wheel_distributed_ucxx.sh | 2 +- ci/test_wheel_ucxx.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 5b6ee4c1..45e03996 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -30,7 +30,7 @@ print_ucx_config # Run smoke tests for aarch64 pull requests if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then rapids-logger "Distributed Smoke Tests" - pytest -vs ci/wheel_smoke_test.py + pytest -vs ci/wheel_smoke_test_distributed_ucxx.py else rapids-logger "Distributed Tests" diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index 8e380fd6..b6d37bbe 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -20,7 +20,7 @@ print_ucx_config # Run smoke tests for aarch64 pull requests if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then rapids-logger "Python Async Smoke Tests" - pytest -vs ci/wheel_smoke_test.py + pytest -vs ci/wheel_smoke_test_ucxx.py else rapids-logger "Python Core Tests" run_py_tests From 3d43a5caef83f5c345bb62910759268c500b38a3 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 14:33:56 -0800 Subject: [PATCH 71/79] Fix smoke test copyright headers --- ci/wheel_smoke_test_distributed_ucxx.py | 3 +++ ci/wheel_smoke_test_ucxx.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/wheel_smoke_test_distributed_ucxx.py b/ci/wheel_smoke_test_distributed_ucxx.py index 71d83351..6c47749d 100644 --- a/ci/wheel_smoke_test_distributed_ucxx.py +++ b/ci/wheel_smoke_test_distributed_ucxx.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + import asyncio import pytest diff --git a/ci/wheel_smoke_test_ucxx.py b/ci/wheel_smoke_test_ucxx.py index c3b9dec9..44b0ebfe 100644 --- a/ci/wheel_smoke_test_ucxx.py +++ b/ci/wheel_smoke_test_ucxx.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: BSD-3-Clause import asyncio From 2107f160a6c7530e39dfdcc7d03b5bb124791f5c Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 14:38:03 -0800 Subject: [PATCH 72/79] Remove irrelevant comment from wheel smoke test --- ci/wheel_smoke_test_distributed_ucxx.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/wheel_smoke_test_distributed_ucxx.py b/ci/wheel_smoke_test_distributed_ucxx.py index 6c47749d..81f79585 100644 --- a/ci/wheel_smoke_test_distributed_ucxx.py +++ b/ci/wheel_smoke_test_distributed_ucxx.py @@ -46,9 +46,6 @@ async def handle_comm(comm): ) @gen_test() async def test_ping_pong_cudf(ucxx_loop, g): - # if this test appears after cupy an import error arises - # *** ImportError: /usr/lib/x86_64-linux-gnu/libstdc++.so.6: version `CXXABI_1.3.11' - # not found (required by python3.7/site-packages/pyarrow/../../../libarrow.so.12) cudf = pytest.importorskip("cudf") from cudf.testing._utils import assert_eq From 1b3977f3f7b73a75271c3c540113d142548bc985 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 14:39:05 -0800 Subject: [PATCH 73/79] Fix CMake comment on spdlog --- cpp/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d6282107..e9634c21 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -176,11 +176,11 @@ target_compile_definitions( # Enable RMM if necessary if(UCXX_ENABLE_RMM) target_link_libraries(ucxx PUBLIC rmm::rmm) + + # Define spdlog level target_compile_definitions(ucxx PUBLIC UCXX_ENABLE_RMM "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") endif() -# Define spdlog level - # Specify the target module library dependencies target_link_libraries(ucxx PUBLIC ucx::ucp) From 9e6ce880d5101076be031161f1fbeed3a4337e33 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 14:41:48 -0800 Subject: [PATCH 74/79] Move `ci/test_utils.sh` functions to `ci/test_common.sh` --- ci/test_common.sh | 24 +++++++++++++++++++++++- ci/test_cpp.sh | 1 - ci/test_python.sh | 1 - ci/test_utils.sh | 27 --------------------------- ci/test_wheel_distributed_ucxx.sh | 1 - ci/test_wheel_ucxx.sh | 1 - 6 files changed, 23 insertions(+), 32 deletions(-) delete mode 100755 ci/test_utils.sh diff --git a/ci/test_common.sh b/ci/test_common.sh index c6cda357..5eed7ae8 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -5,7 +5,29 @@ set -euo pipefail -source "$(dirname "$0")/test_utils.sh" + +################################### Common ##################################### +log_command() { + CMD_LINE=$1 + echo -e "\e[1mRunning: \n ${CMD_LINE}\e[0m" +} + +print_system_stats() { + rapids-logger "Check GPU usage" + nvidia-smi + + rapids-logger "Check NICs" + awk 'END{print $1}' /etc/hosts + cat /etc/hosts +} + +print_ucx_config() { + rapids-logger "UCX Version and Build Configuration" + + set +e + ucx_info -v + set -e +} ##################################### C++ ###################################### diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index a8d5d056..08f202e9 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -5,7 +5,6 @@ set -euo pipefail -source "$(dirname "$0")/test_utils.sh" source "$(dirname "$0")/test_common.sh" rapids-logger "Create test conda environment" diff --git a/ci/test_python.sh b/ci/test_python.sh index 777bc273..c45c2cbb 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -5,7 +5,6 @@ set -euo pipefail -source "$(dirname "$0")/test_utils.sh" source "$(dirname "$0")/test_common.sh" rapids-logger "Create test conda environment" diff --git a/ci/test_utils.sh b/ci/test_utils.sh deleted file mode 100755 index 88bd8ba5..00000000 --- a/ci/test_utils.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: BSD-3-Clause - - -log_command() { - CMD_LINE=$1 - echo -e "\e[1mRunning: \n ${CMD_LINE}\e[0m" -} - -print_system_stats() { - rapids-logger "Check GPU usage" - nvidia-smi - - rapids-logger "Check NICs" - awk 'END{print $1}' /etc/hosts - cat /etc/hosts -} - -print_ucx_config() { - rapids-logger "UCX Version and Build Configuration" - - set +e - ucx_info -v - set -e -} diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index 1f82c85b..c8c0bbca 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -5,7 +5,6 @@ set -euo pipefail PROJECT_NAME="distributed_ucxx" -source "$(dirname "$0")/test_utils.sh" source "$(dirname "$0")/test_common.sh" mkdir -p ./dist diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh index c73c8aaf..c844da67 100755 --- a/ci/test_wheel_ucxx.sh +++ b/ci/test_wheel_ucxx.sh @@ -5,7 +5,6 @@ set -euo pipefail PROJECT_NAME="ucxx" -source "$(dirname "$0")/test_utils.sh" source "$(dirname "$0")/test_common.sh" mkdir -p ./dist From d6af7c692fa20db8706fbe26c8ad7b647bd3ddbc Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 14:44:59 -0800 Subject: [PATCH 75/79] Remove unneeded wheel test import --- ci/wheel_smoke_test_distributed_ucxx.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/wheel_smoke_test_distributed_ucxx.py b/ci/wheel_smoke_test_distributed_ucxx.py index 81f79585..e7f9c0ef 100644 --- a/ci/wheel_smoke_test_distributed_ucxx.py +++ b/ci/wheel_smoke_test_distributed_ucxx.py @@ -10,7 +10,6 @@ import ucxx -import distributed_ucxx # noqa: E402 from distributed_ucxx.utils_test import gen_test, ucxx_loop From 6e4482f6fa2c50fb965cc1a24aef36e9b170026e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 23:48:15 +0100 Subject: [PATCH 76/79] GH workflow fixes Co-authored-by: Vyas Ramasubramani --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 25c15efe..0406e744 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -86,7 +86,7 @@ jobs: date: ${{ inputs.date }} script: ci/build_wheel_distributed_ucxx.sh wheel-publish-distributed-ucxx: - needs: wheel-build-distributed-ucxx + needs: [wheel-build-ucxx, wheel-build-distributed-ucxx] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 with: diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 01f2ee41..fa7f0007 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -81,9 +81,7 @@ jobs: build_type: pull-request script: ci/build_wheel_distributed_ucxx.sh wheel-tests-distributed-ucxx: - needs: - - wheel-build-ucxx - - wheel-build-distributed-ucxx + needs: [wheel-build-ucxx, wheel-build-distributed-ucxx] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: From 4fa91fb5d201958d3f63a84debec5a6aa3a5e36b Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 31 Jan 2024 15:49:21 +0100 Subject: [PATCH 77/79] Make wheel build script more consistent Co-authored-by: Vyas Ramasubramani --- ci/build_wheel.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index f5014666..2b4a1bac 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -36,9 +36,7 @@ if [[ ${package_name} == "distributed-ucxx" ]]; then sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} sed -r -i "s/\"ucxx(.*)\"/\"ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} - cd "${package_dir}" - - python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check + python -m pip wheel "${package_dir}/" -w "${package_dir}/dist" -vvv --no-deps --disable-pip-version-check elif [[ ${package_name} == "ucxx" ]]; then # Add -cuXX to package name sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} From 2a8495df31ad1c563a7eba0967f39783006e7cc6 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 31 Jan 2024 06:52:50 -0800 Subject: [PATCH 78/79] Move `RAPIDS_PY_CUDA_SUFFIX` and build to `ci/build_wheel.sh` --- ci/build_wheel.sh | 6 ++++++ ci/build_wheel_distributed_ucxx.sh | 4 ---- ci/build_wheel_ucxx.sh | 4 ---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 2b4a1bac..46cb8383 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -12,6 +12,8 @@ source rapids-date-string version=$(rapids-generate-version) commit=$(git rev-parse HEAD) +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + # This is the version of the suffix with a preceding hyphen. It's used # everywhere except in the final wheel name. PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}" @@ -37,6 +39,8 @@ if [[ ${package_name} == "distributed-ucxx" ]]; then sed -r -i "s/\"ucxx(.*)\"/\"ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} python -m pip wheel "${package_dir}/" -w "${package_dir}/dist" -vvv --no-deps --disable-pip-version-check + + RAPIDS_PY_WHEEL_NAME="distributed_ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist elif [[ ${package_name} == "ucxx" ]]; then # Add -cuXX to package name sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} @@ -135,6 +139,8 @@ elif [[ ${package_name} == "ucxx" ]]; then pushd repair_dist zip -r $WHL ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ popd + + RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist else echo "Unknown package '${package_name}'" exit 1 diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh index bd328f16..77c2d988 100755 --- a/ci/build_wheel_distributed_ucxx.sh +++ b/ci/build_wheel_distributed_ucxx.sh @@ -5,8 +5,4 @@ set -euo pipefail package_dir="python/distributed-ucxx" -export RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" - ./ci/build_wheel.sh distributed-ucxx ${package_dir} - -RAPIDS_PY_WHEEL_NAME="distributed_ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist diff --git a/ci/build_wheel_ucxx.sh b/ci/build_wheel_ucxx.sh index 74598ad0..12cdcbde 100755 --- a/ci/build_wheel_ucxx.sh +++ b/ci/build_wheel_ucxx.sh @@ -5,8 +5,4 @@ set -euo pipefail package_dir="python" -export RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" - ./ci/build_wheel.sh ucxx ${package_dir} - -RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist From 035c45352f9dd23c717bc5c66811be8652b2a591 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 31 Jan 2024 06:54:56 -0800 Subject: [PATCH 79/79] Do not prevent errors in `print_ucx_config` --- ci/test_common.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/test_common.sh b/ci/test_common.sh index 5eed7ae8..2fbc2c17 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -24,9 +24,7 @@ print_system_stats() { print_ucx_config() { rapids-logger "UCX Version and Build Configuration" - set +e ucx_info -v - set -e }