From 94a19fde0e718ab3af18e1afc5ba79988dee9fdd Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Mon, 29 Jan 2024 17:26:17 +0100 Subject: [PATCH 1/4] Pin to `pytest==7.*` (#179) Pytest 8 introduced breaking changes that `pytest-asyncio` is still in the process of resolving. See https://github.com/pytest-dev/pytest-asyncio/issues/763. This pin should also also be done directly by `pytest-asyncio>=0.23.4`, but that version is still not available in conda-forge. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/ucxx/pull/179 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-120_arch-x86_64.yaml | 2 +- dependencies.yaml | 4 ++-- python/distributed-ucxx/pyproject.toml | 2 +- python/pyproject.toml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index f6c90e38..f1a29638 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -33,9 +33,9 @@ dependencies: - pkg-config - pre-commit - pynvml>=11.4.1 -- pytest - pytest-asyncio - pytest-rerunfailures +- pytest==7.* - python>=3.9,<3.11 - rapids-dask-dependency==24.4.* - rmm==24.4.* diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index be4fb7be..a6801aa5 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -33,9 +33,9 @@ dependencies: - pkg-config - pre-commit - pynvml>=11.4.1 -- pytest - pytest-asyncio - pytest-rerunfailures +- pytest==7.* - python>=3.9,<3.11 - rapids-dask-dependency==24.4.* - rmm==24.4.* diff --git a/dependencies.yaml b/dependencies.yaml index a35cd12d..619c8803 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -241,7 +241,7 @@ dependencies: packages: - cloudpickle - *numba - - pytest + - pytest==7.* - pytest-asyncio - pytest-rerunfailures - rapids-dask-dependency==24.4.* @@ -250,7 +250,7 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - *numpy - - pytest + - pytest==7.* depends_on_cupy: common: - output_types: conda diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index a6fce545..c6a40be3 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -40,7 +40,7 @@ docs = [ ] test = [ "numpy>=1.21", - "pytest", + "pytest==7.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] diff --git a/python/pyproject.toml b/python/pyproject.toml index 1b30cdc6..4a57e003 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -44,9 +44,9 @@ test = [ "cudf==24.4.*", "cupy-cuda11x>=12.0.0", "numba>=0.57.1", - "pytest", "pytest-asyncio", "pytest-rerunfailures", + "pytest==7.*", "rapids-dask-dependency==24.4.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`. From 4af6b4749362247c4714db33d62ef9e92ecd2fac Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 30 Jan 2024 15:01:42 +0100 Subject: [PATCH 2/4] Pin to `pytest==7.*` (#180) Pytest 8 introduced breaking changes that `pytest-asyncio` is still in the process of resolving. See https://github.com/pytest-dev/pytest-asyncio/issues/763. This pin should also also be done directly by `pytest-asyncio>=0.23.4`, but that version is still not available in conda-forge. --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-120_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- python/pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index f84ff41d..ca5e8ad4 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -35,9 +35,9 @@ dependencies: - pkg-config - pre-commit - pynvml>=11.4.1 -- pytest - pytest-asyncio - pytest-rerunfailures +- pytest==7.* - python>=3.9,<3.11 - rmm==24.2.* - scikit-build-core>=0.7.0 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index bc073c12..74413f86 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -35,9 +35,9 @@ dependencies: - pkg-config - pre-commit - pynvml>=11.4.1 -- pytest - pytest-asyncio - pytest-rerunfailures +- pytest==7.* - python>=3.9,<3.11 - rmm==24.2.* - scikit-build-core>=0.7.0 diff --git a/dependencies.yaml b/dependencies.yaml index e83f8185..d996406f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -214,7 +214,7 @@ dependencies: - dask - distributed - numba>=0.57.1 - - pytest + - pytest==7.* - pytest-asyncio - pytest-rerunfailures depends_on_cupy: diff --git a/python/pyproject.toml b/python/pyproject.toml index 32ed32e7..295a0b7c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -45,9 +45,9 @@ test = [ "dask", "distributed", "numba>=0.57.1", - "pytest", "pytest-asyncio", "pytest-rerunfailures", + "pytest==7.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] From 5d50ef9f38f935ce4ea51a9f0eb2ed8afb03af73 Mon Sep 17 00:00:00 2001 From: Mike Sarahan Date: Wed, 31 Jan 2024 14:58:04 -0600 Subject: [PATCH 3/4] add build wheel script and accompanying version info (#167) This PR adds wheel builds for `ucxx`. A follow-up PR will add wheel builds for `distributed-ucxx`. Closes #145 Authors: - Mike Sarahan (https://github.com/msarahan) - Vyas Ramasubramani (https://github.com/vyasr) - Peter Andreas Entschev (https://github.com/pentschev) - Bradley Dice (https://github.com/bdice) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) - Ray Douglass (https://github.com/raydouglass) - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/ucxx/pull/167 --- .github/workflows/build.yaml | 38 ++++ .github/workflows/pr.yaml | 34 ++++ .github/workflows/test.yaml | 20 ++ VERSION | 1 + ci/build_wheel.sh | 147 +++++++++++++++ ci/build_wheel_distributed_ucxx.sh | 8 + ci/build_wheel_ucxx.sh | 8 + ci/test_common.sh | 175 ++++++++++++++++++ ci/test_cpp.sh | 97 ++-------- ci/test_python.sh | 74 +------- ci/test_utils.sh | 24 --- ci/test_wheel_distributed_ucxx.sh | 36 ++++ ci/test_wheel_ucxx.sh | 28 +++ ci/wheel_smoke_test_distributed_ucxx.py | 64 +++++++ ci/wheel_smoke_test_ucxx.py | 116 ++++++++++++ cpp/CMakeLists.txt | 22 ++- python/CMakeLists.txt | 5 + .../distributed-ucxx/distributed_ucxx/VERSION | 1 + .../distributed_ucxx/_version.py | 23 +++ python/distributed-ucxx/pyproject.toml | 11 +- python/ucxx/VERSION | 1 + python/ucxx/_lib/CMakeLists.txt | 3 +- python/ucxx/_version.py | 18 ++ python/ucxx/examples/CMakeLists.txt | 1 + 24 files changed, 758 insertions(+), 197 deletions(-) create mode 100644 VERSION create mode 100755 ci/build_wheel.sh create mode 100755 ci/build_wheel_distributed_ucxx.sh create mode 100755 ci/build_wheel_ucxx.sh create mode 100755 ci/test_common.sh delete mode 100755 ci/test_utils.sh create mode 100755 ci/test_wheel_distributed_ucxx.sh create mode 100755 ci/test_wheel_ucxx.sh create mode 100644 ci/wheel_smoke_test_distributed_ucxx.py create mode 100644 ci/wheel_smoke_test_ucxx.py create mode 120000 python/distributed-ucxx/distributed_ucxx/VERSION create mode 100644 python/distributed-ucxx/distributed_ucxx/_version.py create mode 120000 python/ucxx/VERSION create mode 100644 python/ucxx/_version.py diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 396ff859..0406e744 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -57,3 +57,41 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} + wheel-build-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_ucxx.sh + wheel-publish-ucxx: + needs: wheel-build-ucxx + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: ucxx + wheel-build-distributed-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_distributed_ucxx.sh + wheel-publish-distributed-ucxx: + needs: [wheel-build-ucxx, wheel-build-distributed-ucxx] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: distributed_ucxx diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index ca2c646f..fa7f0007 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -17,6 +17,10 @@ jobs: - docs-build - conda-cpp-tests - conda-python-tests + - wheel-build-ucxx + - wheel-tests-ucxx + - wheel-build-distributed-ucxx + - wheel-tests-distributed-ucxx secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 checks: @@ -54,3 +58,33 @@ jobs: with: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + wheel-build-ucxx: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + with: + build_type: pull-request + script: ci/build_wheel_ucxx.sh + wheel-tests-ucxx: + needs: wheel-build-ucxx + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + with: + build_type: pull-request + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: ci/test_wheel_ucxx.sh + wheel-build-distributed-ucxx: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + with: + build_type: pull-request + script: ci/build_wheel_distributed_ucxx.sh + wheel-tests-distributed-ucxx: + needs: [wheel-build-ucxx, wheel-build-distributed-ucxx] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + with: + build_type: pull-request + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: ci/test_wheel_distributed_ucxx.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 24e975fb..73b47503 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -32,3 +32,23 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + wheel-tests-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: ci/test_wheel_ucxx.sh + wheel-tests-distributed-ucxx: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: ci/test_wheel_distributed_ucxx.sh diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..d142a90c --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.37.00 diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh new file mode 100755 index 00000000..46cb8383 --- /dev/null +++ b/ci/build_wheel.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_name=$1 +package_dir=$2 + +source rapids-configure-sccache +source rapids-date-string + +version=$(rapids-generate-version) +commit=$(git rev-parse HEAD) + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# This is the version of the suffix with a preceding hyphen. It's used +# everywhere except in the final wheel name. +PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}" + +# Patch project metadata files to include the CUDA version suffix and version override. +pyproject_file="${package_dir}/pyproject.toml" + +sed -i -E "s/^name = \"${package_name}(.*)?\"$/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file} +echo "${version}" > VERSION +sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name//-/_}/_version.py" + +# For nightlies we want to ensure that we're pulling in alphas as well. The +# easiest way to do so is to augment the spec with a constraint containing a +# min alpha version that doesn't affect the version bounds but does allow usage +# of alpha versions for that dependency without --pre +alpha_spec='' +if ! rapids-is-release-build; then + alpha_spec=',>=0.0.0a0' +fi + +if [[ ${package_name} == "distributed-ucxx" ]]; then + sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} + sed -r -i "s/\"ucxx(.*)\"/\"ucxx${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + + python -m pip wheel "${package_dir}/" -w "${package_dir}/dist" -vvv --no-deps --disable-pip-version-check + + RAPIDS_PY_WHEEL_NAME="distributed_ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist +elif [[ ${package_name} == "ucxx" ]]; then + # Add -cuXX to package name + sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file} + sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + sed -r -i "s/cudf(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + + # Update cupy package name (different suffix from RAPIDS) + if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then + sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file} + fi + + SKBUILD_CMAKE_ARGS="-DUCXX_ENABLE_RMM=ON" \ + python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check + + python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* + + # Auditwheel rewrites dynamic libraries that are referenced at link time in the + # package. However, UCX loads a number of sub-libraries at runtime via dlopen; + # these are not picked up by auditwheel. Since we have a priori knowledge of + # what these libraries are, we mimic the behaviour of auditwheel by using the + # same hash-based uniqueness scheme and rewriting the link paths. + + WHL=$(realpath ${package_dir}/final_dist/ucxx*manylinux*.whl) + + # first grab the auditwheel hashes for libuc{tms} + LIBUCM=$(unzip -l $WHL | awk 'match($4, /libucm-[^\.]+\./) { print substr($4, RSTART) }') + LIBUCT=$(unzip -l $WHL | awk 'match($4, /libuct-[^\.]+\./) { print substr($4, RSTART) }') + LIBUCS=$(unzip -l $WHL | awk 'match($4, /libucs-[^\.]+\./) { print substr($4, RSTART) }') + LIBNUMA=$(unzip -l $WHL | awk 'match($4, /libnuma-[^\.]+\./) { print substr($4, RSTART) }') + + # Extract the libraries that have already been patched in by auditwheel + mkdir -p repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx + unzip $WHL "ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/*.so*" -d repair_dist/ + + # Patch the RPATH to include ORIGIN for each library + pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs + for f in libu*.so* + do + if [[ -f $f ]]; then + patchelf --add-rpath '$ORIGIN' $f + fi + done + + popd + + # Now copy in all the extra libraries that are only ever loaded at runtime + pushd repair_dist/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx + if [[ -d /usr/lib64/ucx ]]; then + cp -P /usr/lib64/ucx/* . + elif [[ -d /usr/lib/ucx ]]; then + cp -P /usr/lib/ucx/* . + else + echo "Could not find ucx libraries" + exit 1 + fi + + # we link against /lib/site-packages/ucxx_${RAPIDS_PY_CUDA_SUFFIX}.lib/libuc{ptsm} + # we also amend the rpath to search one directory above to *find* libuc{tsm} + for f in libu*.so* + do + # Avoid patching symlinks, which is redundant + if [[ ! -L $f ]]; then + patchelf --replace-needed libuct.so.0 $LIBUCT $f + patchelf --replace-needed libucs.so.0 $LIBUCS $f + patchelf --replace-needed libucm.so.0 $LIBUCM $f + patchelf --replace-needed libnuma.so.1 $LIBNUMA $f + patchelf --add-rpath '$ORIGIN/..' $f + fi + done + + # Bring in cudart as well. To avoid symbol collision with other libraries e.g. + # cupy we mimic auditwheel by renaming the libraries to include the hashes of + # their names. Since there will typically be a chain of symlinks + # libcudart.so->libcudart.so.X->libcudart.so.X.Y.Z we need to follow the chain + # and rename all of them. + + find /usr/local/cuda/ -name "libcudart*.so*" | xargs cp -P -t . + src=libcudart.so + hash=$(sha256sum ${src} | awk '{print substr($1, 0, 8)}') + target=$(basename $(readlink -f ${src})) + + mv ${target} ${target/libcudart/libcudart-${hash}} + while readlink ${src} > /dev/null; do + target=$(readlink ${src}) + ln -s ${target/libcudart/libcudart-${hash}} ${src/libcudart/libcudart-${hash}} + rm -f ${src} + src=${target} + done + + to_rewrite=$(ldd libuct_cuda.so | awk '/libcudart/ { print $1 }') + patchelf --replace-needed ${to_rewrite} libcudart-${hash}.so libuct_cuda.so + patchelf --add-rpath '$ORIGIN' libuct_cuda.so + + popd + + pushd repair_dist + zip -r $WHL ucxx_${RAPIDS_PY_CUDA_SUFFIX}.libs/ + popd + + RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +else + echo "Unknown package '${package_name}'" + exit 1 +fi diff --git a/ci/build_wheel_distributed_ucxx.sh b/ci/build_wheel_distributed_ucxx.sh new file mode 100755 index 00000000..77c2d988 --- /dev/null +++ b/ci/build_wheel_distributed_ucxx.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/distributed-ucxx" + +./ci/build_wheel.sh distributed-ucxx ${package_dir} diff --git a/ci/build_wheel_ucxx.sh b/ci/build_wheel_ucxx.sh new file mode 100755 index 00000000..12cdcbde --- /dev/null +++ b/ci/build_wheel_ucxx.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python" + +./ci/build_wheel.sh ucxx ${package_dir} diff --git a/ci/test_common.sh b/ci/test_common.sh new file mode 100755 index 00000000..2fbc2c17 --- /dev/null +++ b/ci/test_common.sh @@ -0,0 +1,175 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + +set -euo pipefail + + +################################### Common ##################################### +log_command() { + CMD_LINE=$1 + echo -e "\e[1mRunning: \n ${CMD_LINE}\e[0m" +} + +print_system_stats() { + rapids-logger "Check GPU usage" + nvidia-smi + + rapids-logger "Check NICs" + awk 'END{print $1}' /etc/hosts + cat /etc/hosts +} + +print_ucx_config() { + rapids-logger "UCX Version and Build Configuration" + + ucx_info -v +} + + +##################################### C++ ###################################### +_SERVER_PORT=12345 + +run_cpp_tests() { + RUNTIME_PATH=${CONDA_PREFIX:-./} + BINARY_PATH=${RUNTIME_PATH}/bin + + CMD_LINE="timeout 10m ${BINARY_PATH}/gtests/libucxx/UCXX_TEST" + + log_command "${CMD_LINE}" + UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} +} + +run_cpp_benchmark() { + SERVER_PORT=$1 + PROGRESS_MODE=$2 + + RUNTIME_PATH=${CONDA_PREFIX:-./} + BINARY_PATH=${RUNTIME_PATH}/bin + + CMD_LINE_SERVER="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT}" + CMD_LINE_CLIENT="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT} 127.0.0.1" + + log_command "${CMD_LINE_SERVER}" + UCX_TCP_CM_REUSEADDR=y ${CMD_LINE_SERVER} & + sleep 1 + + log_command "${CMD_LINE_CLIENT}" + ${CMD_LINE_CLIENT} +} + +run_cpp_example() { + SERVER_PORT=$1 + PROGRESS_MODE=$2 + + RUNTIME_PATH=${CONDA_PREFIX:-./} + BINARY_PATH=${RUNTIME_PATH}/bin + + CMD_LINE="timeout 1m ${BINARY_PATH}/examples/libucxx/ucxx_example_basic -m ${PROGRESS_MODE} -p ${SERVER_PORT}" + + log_command "${CMD_LINE}" + UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} +} + +run_cpp_port_retry() { + MAX_ATTEMPTS=${1} + RUN_TYPE=${2} + PROGRESS_MODE=${3} + + set +e + for attempt in $(seq 1 ${MAX_ATTEMPTS}); do + echo "Attempt ${attempt}/${MAX_ATTEMPTS} to run ${RUN_TYPE}" + + _SERVER_PORT=$((_SERVER_PORT + 1)) # Use different ports every time to prevent `Device is busy` + + if [[ "${RUN_TYPE}" == "benchmark" ]]; then + run_cpp_benchmark ${_SERVER_PORT} ${PROGRESS_MODE} + elif [[ "${RUN_TYPE}" == "example" ]]; then + run_cpp_example ${_SERVER_PORT} ${PROGRESS_MODE} + else + set -e + echo "Unknown test type "${RUN_TYPE}"" + exit 1 + fi + + LAST_STATUS=$? + if [ ${LAST_STATUS} -eq 0 ]; then + break; + fi + sleep 1 + done + set -e + + if [ ${LAST_STATUS} -ne 0 ]; then + echo "Failure running benchmark client after ${MAX_ATTEMPTS} attempts" + exit $LAST_STATUS + fi +} + + +#################################### Python #################################### +run_py_tests() { + CMD_LINE="timeout 2m python -m pytest -vs python/ucxx/_lib/tests/" + log_command "${CMD_LINE}" + timeout 2m python -m pytest -vs python/ucxx/_lib/tests/ +} + +run_py_tests_async() { + PROGRESS_MODE=$1 + ENABLE_DELAYED_SUBMISSION=$2 + ENABLE_PYTHON_FUTURE=$3 + SKIP=$4 + + CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m python -m pytest -vs python/ucxx/_lib_async/tests/ --durations=50" + + if [ $SKIP -ne 0 ]; then + echo -e "\e[1;33mSkipping unstable test: ${CMD_LINE}\e[0m" + else + log_command "${CMD_LINE}" + UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m python -m pytest -vs python/ucxx/_lib_async/tests/ --durations=50 + fi +} + +run_py_benchmark() { + BACKEND=$1 + PROGRESS_MODE=$2 + ASYNCIO_WAIT=$3 + ENABLE_DELAYED_SUBMISSION=$4 + ENABLE_PYTHON_FUTURE=$5 + N_BUFFERS=$6 + SLOW=$7 + + if [ $ASYNCIO_WAIT -ne 0 ]; then + ASYNCIO_WAIT="--asyncio-wait" + else + ASYNCIO_WAIT="" + fi + + CMD_LINE="UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT}" + + # Workaround for https://github.com/rapidsai/ucxx/issues/15 + CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" + + log_command "${CMD_LINE}" + if [ $SLOW -ne 0 ]; then + echo -e "\e[1;33mSLOW BENCHMARK: it may seem like a deadlock but will eventually complete.\e[0m" + fi + + UCX_KEEPALIVE_INTERVAL=1ms UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT} +} + +################################## Distributed ################################# +run_distributed_ucxx_tests() { + PROGRESS_MODE=$1 + ENABLE_DELAYED_SUBMISSION=$2 + ENABLE_PYTHON_FUTURE=$3 + + CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/" + + # Workaround for https://github.com/rapidsai/ucxx/issues/15 + # CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" + + log_command "${CMD_LINE}" + UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/ +} diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 5d72d463..08f202e9 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -5,7 +5,7 @@ set -euo pipefail -source "$(dirname "$0")/test_utils.sh" +source "$(dirname "$0")/test_common.sh" rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh @@ -24,75 +24,6 @@ print_system_stats BINARY_PATH=${CONDA_PREFIX}/bin -_SERVER_PORT=12345 - -run_tests() { - CMD_LINE="timeout 10m ${BINARY_PATH}/gtests/libucxx/UCXX_TEST" - - log_command "${CMD_LINE}" - UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} -} - -run_benchmark() { - SERVER_PORT=$1 - PROGRESS_MODE=$2 - - CMD_LINE_SERVER="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT}" - CMD_LINE_CLIENT="timeout 1m ${BINARY_PATH}/benchmarks/libucxx/ucxx_perftest -s 8388608 -r -n 20 -m ${PROGRESS_MODE} -p ${SERVER_PORT} 127.0.0.1" - - log_command "${CMD_LINE_SERVER}" - UCX_TCP_CM_REUSEADDR=y ${CMD_LINE_SERVER} & - sleep 1 - - log_command "${CMD_LINE_CLIENT}" - ${CMD_LINE_CLIENT} -} - -run_example() { - SERVER_PORT=$1 - PROGRESS_MODE=$2 - - CMD_LINE="timeout 1m ${BINARY_PATH}/examples/libucxx/ucxx_example_basic -m ${PROGRESS_MODE} -p ${SERVER_PORT}" - - log_command "${CMD_LINE}" - UCX_TCP_CM_REUSEADDR=y ${CMD_LINE} -} - -run_port_retry() { - MAX_ATTEMPTS=${1} - RUN_TYPE=${2} - PROGRESS_MODE=${3} - - set +e - for attempt in $(seq 1 ${MAX_ATTEMPTS}); do - echo "Attempt ${attempt}/${MAX_ATTEMPTS} to run ${RUN_TYPE}" - - _SERVER_PORT=$((_SERVER_PORT + 1)) # Use different ports every time to prevent `Device is busy` - - if [[ "${RUN_TYPE}" == "benchmark" ]]; then - run_benchmark ${_SERVER_PORT} ${PROGRESS_MODE} - elif [[ "${RUN_TYPE}" == "example" ]]; then - run_example ${_SERVER_PORT} ${PROGRESS_MODE} - else - set -e - echo "Unknown test type "${RUN_TYPE}"" - exit 1 - fi - - LAST_STATUS=$? - if [ ${LAST_STATUS} -eq 0 ]; then - break; - fi - sleep 1 - done - set -e - - if [ ${LAST_STATUS} -ne 0 ]; then - echo "Failure running benchmark client after ${MAX_ATTEMPTS} attempts" - exit $LAST_STATUS - fi -} - rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) @@ -104,20 +35,20 @@ print_ucx_config rapids-logger "Run tests with conda package" rapids-logger "C++ Tests" -run_tests +run_cpp_tests rapids-logger "C++ Benchmarks" -# run_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE -run_port_retry 10 "benchmark" "polling" -run_port_retry 10 "benchmark" "blocking" -run_port_retry 10 "benchmark" "thread-polling" -run_port_retry 10 "benchmark" "thread-blocking" -run_port_retry 10 "benchmark" "wait" +# run_cpp_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE +run_cpp_port_retry 10 "benchmark" "polling" +run_cpp_port_retry 10 "benchmark" "blocking" +run_cpp_port_retry 10 "benchmark" "thread-polling" +run_cpp_port_retry 10 "benchmark" "thread-blocking" +run_cpp_port_retry 10 "benchmark" "wait" rapids-logger "C++ Examples" -# run_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE -run_port_retry 10 "example" "polling" -run_port_retry 10 "example" "blocking" -run_port_retry 10 "example" "thread-polling" -run_port_retry 10 "example" "thread-blocking" -run_port_retry 10 "example" "wait" +# run_cpp_port_retry MAX_ATTEMPTS RUN_TYPE PROGRESS_MODE +run_cpp_port_retry 10 "example" "polling" +run_cpp_port_retry 10 "example" "blocking" +run_cpp_port_retry 10 "example" "thread-polling" +run_cpp_port_retry 10 "example" "thread-blocking" +run_cpp_port_retry 10 "example" "wait" diff --git a/ci/test_python.sh b/ci/test_python.sh index 79b932db..c45c2cbb 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -5,7 +5,7 @@ set -euo pipefail -source "$(dirname "$0")/test_utils.sh" +source "$(dirname "$0")/test_common.sh" rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh @@ -22,70 +22,6 @@ rapids-print-env print_system_stats -run_tests() { - CMD_LINE="timeout 2m pytest -vs python/ucxx/_lib/tests/" - log_command "${CMD_LINE}" - timeout 2m pytest -vs python/ucxx/_lib/tests/ -} - -run_tests_async() { - PROGRESS_MODE=$1 - ENABLE_DELAYED_SUBMISSION=$2 - ENABLE_PYTHON_FUTURE=$3 - SKIP=$4 - - CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50" - - if [ $SKIP -ne 0 ]; then - echo -e "\e[1;33mSkipping unstable test: ${CMD_LINE}\e[0m" - else - log_command "${CMD_LINE}" - UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50 - fi -} - -run_py_benchmark() { - BACKEND=$1 - PROGRESS_MODE=$2 - ASYNCIO_WAIT=$3 - ENABLE_DELAYED_SUBMISSION=$4 - ENABLE_PYTHON_FUTURE=$5 - N_BUFFERS=$6 - SLOW=$7 - - if [ $ASYNCIO_WAIT -ne 0 ]; then - ASYNCIO_WAIT="--asyncio-wait" - else - ASYNCIO_WAIT="" - fi - - CMD_LINE="UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT}" - - # Workaround for https://github.com/rapidsai/ucxx/issues/15 - CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" - - log_command "${CMD_LINE}" - if [ $SLOW -ne 0 ]; then - echo -e "\e[1;33mSLOW BENCHMARK: it may seem like a deadlock but will eventually complete.\e[0m" - fi - - UCX_KEEPALIVE_INTERVAL=1ms UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT} -} - -run_distributed_ucxx_tests() { - PROGRESS_MODE=$1 - ENABLE_DELAYED_SUBMISSION=$2 - ENABLE_PYTHON_FUTURE=$3 - - CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/" - - # Workaround for https://github.com/rapidsai/ucxx/issues/15 - # CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" - - log_command "${CMD_LINE}" - UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/ -} - rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) @@ -103,12 +39,12 @@ print_ucx_config rapids-logger "Run tests with conda package" rapids-logger "Python Core Tests" -run_tests +run_py_tests rapids-logger "Python Async Tests" -# run_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP -run_tests_async thread 0 0 0 -run_tests_async thread 1 1 0 +# run_py_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP +run_py_tests_async thread 0 0 0 +run_py_tests_async thread 1 1 0 rapids-logger "Python Benchmarks" # run_py_benchmark BACKEND PROGRESS_MODE ASYNCIO_WAIT ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE NBUFFERS SLOW diff --git a/ci/test_utils.sh b/ci/test_utils.sh deleted file mode 100755 index 237b1f1d..00000000 --- a/ci/test_utils.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: BSD-3-Clause - - -log_command() { - CMD_LINE=$1 - echo -e "\e[1mRunning: \n ${CMD_LINE}\e[0m" -} - -print_system_stats() { - rapids-logger "Check GPU usage" - nvidia-smi - - rapids-logger "Check NICs" - awk 'END{print $1}' /etc/hosts - cat /etc/hosts -} - -print_ucx_config() { - rapids-logger "UCX Version and Build Configuration" - ucx_info -v -} diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh new file mode 100755 index 00000000..c8c0bbca --- /dev/null +++ b/ci/test_wheel_distributed_ucxx.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +PROJECT_NAME="distributed_ucxx" + +source "$(dirname "$0")/test_common.sh" + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# Install previously built ucxx wheel +RAPIDS_PY_WHEEL_NAME="ucxx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-ucxx-dep +python -m pip install ./local-ucxx-dep/ucxx*.whl + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] + +# TODO: We need distributed installed in developer mode to provide test utils, +# we still need to match to the `rapids-dask-dependency` version. +rapids-logger "Install Distributed in developer mode" +git clone https://github.com/dask/distributed /tmp/distributed +python -m pip install -e /tmp/distributed + +# Run smoke tests for aarch64 pull requests +if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then + rapids-logger "Distributed Smoke Tests" + python -m pytest -vs ci/wheel_smoke_test_distributed_ucxx.py +else + rapids-logger "Distributed Tests" + + # run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE + run_distributed_ucxx_tests thread 1 1 +fi diff --git a/ci/test_wheel_ucxx.sh b/ci/test_wheel_ucxx.sh new file mode 100755 index 00000000..c844da67 --- /dev/null +++ b/ci/test_wheel_ucxx.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +PROJECT_NAME="ucxx" + +source "$(dirname "$0")/test_common.sh" + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="${PROJECT_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] + +# Run smoke tests for aarch64 pull requests +if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then + rapids-logger "Python Async Smoke Tests" + python -m pytest -vs ci/wheel_smoke_test_ucxx.py +else + rapids-logger "Python Core Tests" + run_py_tests + + rapids-logger "Python Async Tests" + # run_py_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP + run_py_tests_async thread 1 1 0 +fi diff --git a/ci/wheel_smoke_test_distributed_ucxx.py b/ci/wheel_smoke_test_distributed_ucxx.py new file mode 100644 index 00000000..e7f9c0ef --- /dev/null +++ b/ci/wheel_smoke_test_distributed_ucxx.py @@ -0,0 +1,64 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + +import asyncio + +import pytest + +from distributed.comm import connect, listen +from distributed.protocol import to_serialize + +import ucxx + +from distributed_ucxx.utils_test import gen_test, ucxx_loop + + +try: + HOST = ucxx.get_address() +except Exception: + HOST = "127.0.0.1" + + +async def get_comm_pair( + listen_addr=f"ucxx://{HOST}", listen_args=None, connect_args=None, **kwargs +): + listen_args = listen_args or {} + connect_args = connect_args or {} + q = asyncio.queues.Queue() + + async def handle_comm(comm): + await q.put(comm) + + listener = listen(listen_addr, handle_comm, **listen_args, **kwargs) + async with listener: + comm = await connect(listener.contact_address, **connect_args, **kwargs) + serv_comm = await q.get() + return (comm, serv_comm) + + +@pytest.mark.parametrize( + "g", + [ + lambda cudf: cudf.Series([1, 2, 3]), + lambda cudf: cudf.DataFrame({"a": [1, 2, None], "b": [1.0, 2.0, None]}), + ], +) +@gen_test() +async def test_ping_pong_cudf(ucxx_loop, g): + cudf = pytest.importorskip("cudf") + from cudf.testing._utils import assert_eq + + cudf_obj = g(cudf) + + com, serv_com = await get_comm_pair() + msg = {"op": "ping", "data": to_serialize(cudf_obj)} + + await com.write(msg) + result = await serv_com.read() + + cudf_obj_2 = result.pop("data") + assert result["op"] == "ping" + assert_eq(cudf_obj, cudf_obj_2) + + await com.close() + await serv_com.close() diff --git a/ci/wheel_smoke_test_ucxx.py b/ci/wheel_smoke_test_ucxx.py new file mode 100644 index 00000000..44b0ebfe --- /dev/null +++ b/ci/wheel_smoke_test_ucxx.py @@ -0,0 +1,116 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + +import asyncio +import pickle + +import numpy as np +import pytest + +import ucxx + +cudf = pytest.importorskip("cudf") +distributed = pytest.importorskip("distributed") +cuda = pytest.importorskip("numba.cuda") + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "g", + [ + lambda cudf: cudf.Series([1, 2, 3]), + lambda cudf: cudf.DataFrame({"a": np.random.random(1200000)}), + ], +) +async def test_send_recv_cudf(event_loop, g): + from distributed.utils import nbytes + + class UCX: + def __init__(self, ep): + self.ep = ep + + async def write(self, cdf): + header, _frames = cdf.serialize() + frames = [pickle.dumps(header)] + _frames + + # Send meta data + await self.ep.send(np.array([len(frames)], dtype=np.uint64)) + await self.ep.send( + np.array( + [hasattr(f, "__cuda_array_interface__") for f in frames], + dtype=bool, + ) + ) + await self.ep.send(np.array([nbytes(f) for f in frames], dtype=np.uint64)) + # Send frames + for frame in frames: + if nbytes(frame) > 0: + await self.ep.send(frame) + + async def read(self): + try: + # Recv meta data + nframes = np.empty(1, dtype=np.uint64) + await self.ep.recv(nframes) + is_cudas = np.empty(nframes[0], dtype=bool) + await self.ep.recv(is_cudas) + sizes = np.empty(nframes[0], dtype=np.uint64) + await self.ep.recv(sizes) + except ( + ucxx.exceptions.UCXCanceledError, + ucxx.exceptions.UCXCloseError, + ) as e: + msg = "SOMETHING TERRIBLE HAS HAPPENED IN THE TEST" + raise e(msg) + else: + # Recv frames + frames = [] + for is_cuda, size in zip(is_cudas.tolist(), sizes.tolist()): + if size > 0: + if is_cuda: + frame = cuda.device_array((size,), dtype=np.uint8) + else: + frame = np.empty(size, dtype=np.uint8) + await self.ep.recv(frame) + frames.append(frame) + else: + if is_cuda: + frames.append(cuda.device_array((0,), dtype=np.uint8)) + else: + frames.append(b"") + return frames + + class UCXListener: + def __init__(self): + self.comm = None + + def start(self): + async def serve_forever(ep): + ucx = UCX(ep) + self.comm = ucx + + self.ucxx_server = ucxx.create_listener(serve_forever) + + uu = UCXListener() + uu.start() + uu.address = ucxx.get_address() + uu.client = await ucxx.create_endpoint(uu.address, uu.ucxx_server.port) + ucx = UCX(uu.client) + await asyncio.sleep(0.2) + msg = g(cudf) + frames, _ = await asyncio.gather(uu.comm.read(), ucx.write(msg)) + ucx_header = pickle.loads(frames[0]) + cudf_buffer = frames[1:] + typ = type(msg) + res = typ.deserialize(ucx_header, cudf_buffer) + + from cudf.testing._utils import assert_eq + + assert_eq(res, msg) + await uu.comm.ep.close() + await uu.client.close() + + assert uu.client.closed + assert uu.comm.ep.closed + del uu.ucxx_server + ucxx.reset() diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a3b40936..e9634c21 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -104,9 +104,13 @@ rapids_find_package( # add third party dependencies using CPM rapids_cpm_init() # find rmm -include(cmake/thirdparty/get_rmm.cmake) +if(UCXX_ENABLE_RMM) + include(cmake/thirdparty/get_rmm.cmake) +endif() # find or install GoogleTest -include(cmake/thirdparty/get_gtest.cmake) +if(BUILD_TESTS) + include(cmake/thirdparty/get_gtest.cmake) +endif() # ################################################################################################## # * library targets ------------------------------------------------------------------------------- @@ -171,17 +175,15 @@ target_compile_definitions( # Enable RMM if necessary if(UCXX_ENABLE_RMM) - target_compile_definitions(ucxx PUBLIC UCXX_ENABLE_RMM) -endif() + target_link_libraries(ucxx PUBLIC rmm::rmm) -# Define spdlog level -target_compile_definitions(ucxx PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") + # Define spdlog level + target_compile_definitions(ucxx PUBLIC UCXX_ENABLE_RMM "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") +endif() # Specify the target module library dependencies -target_link_libraries( - ucxx - PUBLIC rmm::rmm ucx::ucp -) +target_link_libraries(ucxx PUBLIC ucx::ucp) + # Add Conda library, and include paths if specified if(TARGET conda_env) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 5c56ede6..8d31e198 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -32,6 +32,7 @@ include(rapids-cython-core) if(NOT ucxx_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) + set(UCXX_ENABLE_PYTHON ON) set(_exclude_from_all "") @@ -41,10 +42,14 @@ if(NOT ucxx_FOUND) # and modify the rpaths appropriately. set(cython_lib_dir ucxx) install(TARGETS ucxx DESTINATION ${cython_lib_dir}) + install(TARGETS ucxx_python DESTINATION ${cython_lib_dir}) endif() rapids_cython_init() +find_package( + Python3 REQUIRED COMPONENTS Development.Embed +) add_subdirectory(ucxx/examples) add_subdirectory(ucxx/_lib) diff --git a/python/distributed-ucxx/distributed_ucxx/VERSION b/python/distributed-ucxx/distributed_ucxx/VERSION new file mode 120000 index 00000000..d62dc733 --- /dev/null +++ b/python/distributed-ucxx/distributed_ucxx/VERSION @@ -0,0 +1 @@ +../../../VERSION \ No newline at end of file diff --git a/python/distributed-ucxx/distributed_ucxx/_version.py b/python/distributed-ucxx/distributed_ucxx/_version.py new file mode 100644 index 00000000..536769cc --- /dev/null +++ b/python/distributed-ucxx/distributed_ucxx/_version.py @@ -0,0 +1,23 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = ( + importlib.resources.files("distributed_ucxx") + .joinpath("VERSION") + .read_text() + .strip() +) +__git_commit__ = "" diff --git a/python/distributed-ucxx/pyproject.toml b/python/distributed-ucxx/pyproject.toml index c6a40be3..d1470018 100644 --- a/python/distributed-ucxx/pyproject.toml +++ b/python/distributed-ucxx/pyproject.toml @@ -14,7 +14,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache-2.0" } -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "numba>=0.57.1", "rapids-dask-dependency==24.4.*", @@ -24,7 +24,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ] @@ -110,11 +109,3 @@ exclude = [ "docs.*", "tests.*", ] - -[tool.versioneer] -VCS = "git" -style = "pep440" -versionfile_source = "distributed_ucxx/_version.py" -versionfile_build = "distributed_ucxx/_version.py" -tag_prefix = "v" -parentdir_prefix = "distributed_ucxx-" diff --git a/python/ucxx/VERSION b/python/ucxx/VERSION new file mode 120000 index 00000000..558194c5 --- /dev/null +++ b/python/ucxx/VERSION @@ -0,0 +1 @@ +../../VERSION \ No newline at end of file diff --git a/python/ucxx/_lib/CMakeLists.txt b/python/ucxx/_lib/CMakeLists.txt index 21007f82..6f0c45a1 100644 --- a/python/ucxx/_lib/CMakeLists.txt +++ b/python/ucxx/_lib/CMakeLists.txt @@ -9,7 +9,8 @@ set(linked_libraries ucxx::ucxx ucxx::python Python3::Python) rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS ucxx + LINKED_LIBRARIES "${linked_libraries}" + ASSOCIATED_TARGETS ucxx ucxx_python ) find_package(Python REQUIRED COMPONENTS Development NumPy) diff --git a/python/ucxx/_version.py b/python/ucxx/_version.py new file mode 100644 index 00000000..a1f944f3 --- /dev/null +++ b/python/ucxx/_version.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.resources + +__version__ = importlib.resources.files("ucxx").joinpath("VERSION").read_text().strip() +__git_commit__ = "" diff --git a/python/ucxx/examples/CMakeLists.txt b/python/ucxx/examples/CMakeLists.txt index b7c3868c..d654d239 100644 --- a/python/ucxx/examples/CMakeLists.txt +++ b/python/ucxx/examples/CMakeLists.txt @@ -10,6 +10,7 @@ rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" + ASSOCIATED_TARGETS ucxx ucxx_python ) target_include_directories(python_future_task_app PRIVATE ".") From b1fa95c5e76c86679d7f64dbdd35c018a62690a7 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 2 Feb 2024 21:49:02 +0100 Subject: [PATCH 4/4] Run tests requiring Distributed internals separately (#183) Some tests require Distributed internals and installed in developer mode. Since we have no good way to install it with `rapids-dask-dependency`, we now run those separately from the rest. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Ray Douglass (https://github.com/raydouglass) - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: https://github.com/rapidsai/ucxx/pull/183 --- ci/test_common.sh | 30 +++++++++++++++++-- ci/test_python.sh | 15 ++++++---- ci/test_wheel_distributed_ucxx.sh | 11 ++++--- .../distributed_ucxx/tests/test_ucxx.py | 10 ------- .../tests_internal/conftest.py | 5 ++++ .../tests_internal/test_distributed_comms.py | 15 ++++++++++ 6 files changed, 61 insertions(+), 25 deletions(-) create mode 100644 python/distributed-ucxx/distributed_ucxx/tests_internal/conftest.py create mode 100644 python/distributed-ucxx/distributed_ucxx/tests_internal/test_distributed_comms.py diff --git a/ci/test_common.sh b/ci/test_common.sh index 2fbc2c17..b7b32eab 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -160,6 +160,20 @@ run_py_benchmark() { } ################################## Distributed ################################# +install_distributed_dev_mode() { + # Running Distributed tests which access its internals requires installing it in + # developer mode. This isn't a great solution but it's what we can currently do + # to run non-public API tests in CI. + + rapids-logger "Install Distributed in developer mode" + git clone https://github.com/dask/distributed /tmp/distributed + pip install -e /tmp/distributed + # `pip install -e` removes files under `distributed` but not the directory, later + # causing failures to import modules. + PYTHON_ENV_PATH=${CONDA_PREFIX:-/pyenv} + rm -rf $(find ${PYTHON_ENV_PATH} -type d -iname "site-packages")/distributed +} + run_distributed_ucxx_tests() { PROGRESS_MODE=$1 ENABLE_DELAYED_SUBMISSION=$2 @@ -167,9 +181,19 @@ run_distributed_ucxx_tests() { CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/" - # Workaround for https://github.com/rapidsai/ucxx/issues/15 - # CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}" - log_command "${CMD_LINE}" UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/ } + +run_distributed_ucxx_tests_internal() { + # Note that tests here require Distributed installed in developer mode! + + PROGRESS_MODE=$1 + ENABLE_DELAYED_SUBMISSION=$2 + ENABLE_PYTHON_FUTURE=$3 + + CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests_internal/" + + log_command "${CMD_LINE}" + UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests_internal/ +} diff --git a/ci/test_python.sh b/ci/test_python.sh index c45c2cbb..5dbbbd57 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -29,12 +29,6 @@ rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ libucxx ucxx distributed-ucxx -# TODO: Perhaps install from conda? We need distributed installed in developer -# mode to provide test utils, but that's probably not doable from conda packages. -rapids-logger "Install Distributed in developer mode" -git clone https://github.com/dask/distributed /tmp/distributed -pip install -e /tmp/distributed - print_ucx_config rapids-logger "Run tests with conda package" @@ -69,5 +63,14 @@ run_distributed_ucxx_tests thread 0 1 run_distributed_ucxx_tests thread 1 0 run_distributed_ucxx_tests thread 1 1 +install_distributed_dev_mode + +# run_distributed_ucxx_tests_internal PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE +run_distributed_ucxx_tests_internal polling 0 0 +run_distributed_ucxx_tests_internal thread 0 0 +run_distributed_ucxx_tests_internal thread 0 1 +run_distributed_ucxx_tests_internal thread 1 0 +run_distributed_ucxx_tests_internal thread 1 1 + rapids-logger "C++ future -> Python future notifier example" python -m ucxx.examples.python_future_task_example diff --git a/ci/test_wheel_distributed_ucxx.sh b/ci/test_wheel_distributed_ucxx.sh index c8c0bbca..c624b365 100755 --- a/ci/test_wheel_distributed_ucxx.sh +++ b/ci/test_wheel_distributed_ucxx.sh @@ -18,12 +18,6 @@ python -m pip install ./local-ucxx-dep/ucxx*.whl # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/${PROJECT_NAME}*.whl)[test] -# TODO: We need distributed installed in developer mode to provide test utils, -# we still need to match to the `rapids-dask-dependency` version. -rapids-logger "Install Distributed in developer mode" -git clone https://github.com/dask/distributed /tmp/distributed -python -m pip install -e /tmp/distributed - # Run smoke tests for aarch64 pull requests if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then rapids-logger "Distributed Smoke Tests" @@ -33,4 +27,9 @@ else # run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE run_distributed_ucxx_tests thread 1 1 + + install_distributed_dev_mode + + # run_distributed_ucxx_tests_internal PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE + run_distributed_ucxx_tests_internal thread 1 1 fi diff --git a/python/distributed-ucxx/distributed_ucxx/tests/test_ucxx.py b/python/distributed-ucxx/distributed_ucxx/tests/test_ucxx.py index 1f820629..2135b3ba 100644 --- a/python/distributed-ucxx/distributed_ucxx/tests/test_ucxx.py +++ b/python/distributed-ucxx/distributed_ucxx/tests/test_ucxx.py @@ -165,16 +165,6 @@ async def test_ping_pong_data(ucxx_loop): await serv_com.close() -@gen_test() -async def test_ucxx_deserialize(ucxx_loop): - # Note we see this error on some systems with this test: - # `socket.gaierror: [Errno -5] No address associated with hostname` - # This may be due to a system configuration issue. - from distributed.comm.tests.test_comms import check_deserialize - - await check_deserialize("tcp://") - - @pytest.mark.parametrize( "g", [ diff --git a/python/distributed-ucxx/distributed_ucxx/tests_internal/conftest.py b/python/distributed-ucxx/distributed_ucxx/tests_internal/conftest.py new file mode 100644 index 00000000..ca71f84e --- /dev/null +++ b/python/distributed-ucxx/distributed_ucxx/tests_internal/conftest.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + +# Make all fixtures available +from distributed_ucxx.utils_test import * # noqa diff --git a/python/distributed-ucxx/distributed_ucxx/tests_internal/test_distributed_comms.py b/python/distributed-ucxx/distributed_ucxx/tests_internal/test_distributed_comms.py new file mode 100644 index 00000000..9bc7afa1 --- /dev/null +++ b/python/distributed-ucxx/distributed_ucxx/tests_internal/test_distributed_comms.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: BSD-3-Clause + +from distributed.comm.tests.test_comms import check_deserialize + +from distributed_ucxx.utils_test import gen_test + + +@gen_test() +async def test_ucxx_deserialize(ucxx_loop): + # Note we see this error on some systems with this test: + # `socket.gaierror: [Errno -5] No address associated with hostname` + # This may be due to a system configuration issue. + + await check_deserialize("ucxx://")