Skip to content

Commit

Permalink
[FEA] Add third-party library integration testing of cudf.pandas to c…
Browse files Browse the repository at this point in the history
…udf (#16645)

Closes #16580

Authors:
  - Matthew Murray (https://github.com/Matt711)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #16645
  • Loading branch information
Matt711 authored Aug 30, 2024
1 parent f932bf9 commit 62a53b3
Show file tree
Hide file tree
Showing 25 changed files with 2,407 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,14 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/cudf_pandas_scripts/run_tests.sh
third-party-integration-tests-cudf-pandas:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
container_image: "rapidsai/ci-conda:latest"
run_script: |
ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
3 changes: 3 additions & 0 deletions ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ fi
python -m pip install ipykernel
python -m ipykernel install --user --name python3

# The third-party integration tests are ignored because they are run nightly in seperate CI job
python -m pytest -p cudf.pandas \
--ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
--cov-config=./python/cudf/.coveragerc \
--cov=cudf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
Expand All @@ -80,6 +82,7 @@ for version in "${versions[@]}"; do
echo "Installing pandas version: ${version}"
python -m pip install "numpy>=1.23,<2.0a0" "pandas==${version}"
python -m pytest -p cudf.pandas \
--ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
--cov-config=./python/cudf/.coveragerc \
--cov=cudf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

cleanup() {
rm ${TEST_DIR}/results-*.pickle
}

trap cleanup EXIT

runtest_gold() {
local lib=$1

pytest \
-v \
--continue-on-collection-errors \
--cache-clear \
--numprocesses=${NUM_PROCESSES} \
--dist=worksteal \
${TEST_DIR}/test_${lib}*.py
}

runtest_cudf_pandas() {
local lib=$1

pytest \
-p cudf.pandas \
-v \
--continue-on-collection-errors \
--cache-clear \
--numprocesses=${NUM_PROCESSES} \
--dist=worksteal \
${TEST_DIR}/test_${lib}*.py
}

main() {
local lib=$1

# generation phase
runtest_gold ${lib}
runtest_cudf_pandas ${lib}

# assertion phase
pytest \
--compare \
-p cudf.pandas \
-v \
--continue-on-collection-errors \
--cache-clear \
--numprocesses=${NUM_PROCESSES} \
--dist=worksteal \
${TEST_DIR}/test_${lib}*.py
}

main $@
83 changes: 83 additions & 0 deletions ci/cudf_pandas_scripts/third-party-integration/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

# Common setup steps shared by Python test jobs

set -euo pipefail

write_output() {
local key="$1"
local value="$2"
echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
}

extract_lib_from_dependencies_yaml() {
local file=$1
# Parse all keys in dependencies.yaml under the "files" section,
# extract all the keys that start with "test_", and extract the rest
local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries(select(.key | contains("test_"))) | keys | map(sub("^test_"; ""))')"
echo $extracted_libs
}

main() {
local dependencies_yaml="$1"

LIBS=$(extract_lib_from_dependencies_yaml "$dependencies_yaml")
LIBS=${LIBS#[}
LIBS=${LIBS%]}

for lib in ${LIBS//,/ }; do
lib=$(echo "$lib" | tr -d '""')
echo "Running tests for library $lib"

CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)

. /opt/conda/etc/profile.d/conda.sh

rapids-logger "Generate Python testing dependencies"
rapids-dependency-file-generator \
--config "$dependencies_yaml" \
--output conda \
--file-key test_${lib} \
--matrix "cuda=${CUDA_MAJOR};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --yes -f env.yaml -n test

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test
set -u

repo_root=$(git rev-parse --show-toplevel)
TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests

rapids-print-env

rapids-logger "Check GPU usage"
nvidia-smi

EXITCODE=0
trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest ${lib}"

NUM_PROCESSES=8
serial_libraries=(
"tensorflow"
)
for serial_library in "${serial_libraries[@]}"; do
if [ "${lib}" = "${serial_library}" ]; then
NUM_PROCESSES=1
fi
done

TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}

rapids-logger "Test script exiting with value: ${EXITCODE}"
done

exit ${EXITCODE}
}

main "$@"
Loading

0 comments on commit 62a53b3

Please sign in to comment.