Skip to content

Commit

Permalink
Build CPP tests in new CI workflow (#6947)
Browse files Browse the repository at this point in the history
  • Loading branch information
will-cromar authored Apr 26, 2024
1 parent b9a9449 commit b834e49
Show file tree
Hide file tree
Showing 12 changed files with 200 additions and 190 deletions.
108 changes: 108 additions & 0 deletions .github/scripts/run_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
set -ex

function run_torch_xla_python_tests() {
PYTORCH_DIR=$1
XLA_DIR=$2
USE_COVERAGE="${3:-0}"

pushd $XLA_DIR
echo "Running Python Tests"
if [ "$USE_COVERAGE" != "0" ]; then
pip install coverage==6.5.0 --upgrade
pip install coverage-lcov
pip install toml
./test/run_tests.sh
coverage combine
mkdir lcov && cp .coverage lcov/
coverage-lcov --data_file_path lcov/.coverage
coverage html
cp lcov.info htmlcov/
mv htmlcov ~/
chmod -R 755 ~/htmlcov
else
./test/run_tests.sh
fi
popd
}

function run_torch_xla_cpp_tests() {
PYTORCH_DIR=$1
XLA_DIR=$2
USE_COVERAGE="${3:-0}"

TORCH_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch').get_filename()))")
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TORCH_DIR}/lib
if [ -x "$(command -v nvidia-smi)" ]; then
CUDA_PLUGIN_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch_xla_cuda_plugin').get_filename()))")
export PJRT_LIBRARY_PATH=$CUDA_PLUGIN_DIR/lib/pjrt_c_api_gpu_plugin.so
export PJRT_DEVICE=LIBRARY
export PJRT_DYNAMIC_PLUGINS=1
else
export PJRT_DEVICE=CPU
fi
export XLA_EXPERIMENTAL="nonzero:masked_select:nms"

test_names1=("test_aten_xla_tensor_1"
"test_aten_xla_tensor_2"
"test_aten_xla_tensor_3"
"test_aten_xla_tensor_4"
"pjrt_computation_client_test"
"ifrt_computation_client_test")
test_names2=("test_aten_xla_tensor_5"
"test_aten_xla_tensor_6"
"test_ir"
"test_lazy"
"test_replication"
"test_tensor"
# disable test_xla_backend_intf since it is flaky on upstream
#"test_xla_backend_intf"
"test_xla_sharding")
if [[ "$RUN_CPP_TESTS1" == "cpp_tests1" ]]; then
test_names=("${test_names1[@]}")
elif [[ "$RUN_CPP_TESTS2" == "cpp_tests2" ]]; then
test_names=("${test_names2[@]}")
else
test_names=("${test_names1[@]}" "${test_names2[@]}")
fi

for name in "${test_names[@]}"; do
echo "Running $name cpp test..."
/tmp/test/bin/${name}
done
}

function run_torch_xla_benchmark_tests() {
XLA_DIR=$1
pushd $XLA_DIR
echo "Running Benchmark Tests"
test/benchmarks/run_tests.sh -L""
}

PYTORCH_DIR=$1
XLA_DIR=$2
USE_COVERAGE="${3:-0}"
RUN_CPP="${RUN_CPP_TESTS:0}"
RUN_PYTHON="${RUN_PYTHON_TESTS:0}"

if [ -x "$(command -v nvidia-smi)" ]; then
num_devices=$(nvidia-smi --list-gpus | wc -l)
echo "Found $num_devices GPU devices..."
export GPU_NUM_DEVICES=$num_devices
fi
export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla"
export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")

if [[ -z "$RUN_BENCHMARK_TESTS" && -z "$RUN_CPP_TESTS1" && -z "$RUN_CPP_TESTS2" && -z "$RUN_PYTHON_TESTS" ]]; then
run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
run_torch_xla_benchmark_tests $XLA_DIR
else
# run tests separately.
if [[ "$RUN_PYTHON_TESTS" == "python_tests" ]]; then
run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
elif [[ "$RUN_BENCHMARK_TESTS" == "benchmark_tests" ]]; then
run_torch_xla_benchmark_tests $XLA_DIR
else
run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
fi
fi
9 changes: 7 additions & 2 deletions .github/workflows/_build_torch_xla.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json
BAZEL_JOBS: 16
BAZEL_REMOTE_CACHE: 1
# BUILD_CPP_TESTS: 1
BUILD_CPP_TESTS: 1
steps:
- name: Setup gcloud
shell: bash
Expand All @@ -46,9 +46,14 @@ jobs:
shell: bash
run: |
cd pytorch/xla/infra/ansible
ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps
ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 build_cpp_tests=1 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps
- name: Upload wheel
uses: actions/upload-artifact@v4
with:
name: torch-xla-wheels
path: /dist/*.whl
- name: Upload CPP test binaries
uses: actions/upload-artifact@v4
with:
name: cpp-test-bin
path: /tmp/test/bin
24 changes: 20 additions & 4 deletions .github/workflows/_test_python.yml → .github/workflows/_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ jobs:
run_xla_op_tests3: 'xla_op3'
- run_python_tests: 'python_tests'
run_torch_mp_op_tests: 'torch_mp_op'
- run_cpp_tests: 'cpp_tests'
run_cpp_tests1: 'cpp_tests1'
- run_cpp_tests: 'cpp_tests'
run_cpp_tests2: 'cpp_tests2'
timeout-minutes: ${{ inputs.timeout-minutes }}
env:
GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }}
Expand All @@ -64,6 +68,8 @@ jobs:
RUN_XLA_OP_TESTS2: ${{ matrix.run_xla_op_tests2 }}
RUN_XLA_OP_TESTS3: ${{ matrix.run_xla_op_tests3 }}
RUN_TORCH_MP_OP_TESTS: ${{ matrix.run_torch_mp_op_tests }}
RUN_CPP_TESTS1: ${{ matrix.run_cpp_tests1 }}
RUN_CPP_TESTS2: ${{ matrix.run_cpp_tests2 }}
BAZEL_JOBS: 16
BAZEL_REMOTE_CACHE: 1
steps:
Expand All @@ -76,6 +82,19 @@ jobs:
with:
name: torch-xla-wheels
path: /tmp/wheels/
- name: Fetch CPP test binaries
uses: actions/download-artifact@v4
with:
name: cpp-test-bin
path: /tmp/test/bin
if: ${{ matrix.run_cpp_tests }}
# GitHub Actions doesn't preserve executable permissions
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
- name: Set CPP test permissions
run: |
chmod +x /tmp/test/bin/*
ls -l /tmp/test/bin
if: ${{ matrix.run_cpp_tests }}
- name: Fetch CUDA plugin
uses: actions/download-artifact@v4
with:
Expand Down Expand Up @@ -134,10 +153,7 @@ jobs:
fi
- name: Test
shell: bash
run: |
source pytorch/xla/.circleci/common.sh
run_torch_xla_tests pytorch/ pytorch/xla/ $USE_COVERAGE
run: pytorch/xla/.github/scripts/run_tests.sh pytorch/ pytorch/xla/ $USE_COVERAGE
- name: Upload coverage results
if: ${{ inputs.collect-coverage }}
shell: bash
Expand Down
150 changes: 0 additions & 150 deletions .github/workflows/_test_cpp.yml

This file was deleted.

33 changes: 5 additions & 28 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,6 @@ jobs:
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-cpp-cpu:
name: "CPU C++ tests"
uses: ./.github/workflows/_test_cpp.yml
needs: build
with:
docker-image: ${{ needs.build.outputs.docker-image }}
timeout-minutes: 120
collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-cpp-cuda:
name: "GPU C++ tests"
uses: ./.github/workflows/_test_cpp.yml
needs: build
with:
docker-image: ${{ needs.build.outputs.docker-image }}
runner: linux.8xlarge.nvidia.gpu
timeout-minutes: 300
collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

push-docs:
name: "Build & publish docs"
if: github.event_name == 'push' && (github.event.ref == 'refs/heads/master' || startsWith(github.event.ref, 'refs/tags/r'))
Expand Down Expand Up @@ -81,8 +58,8 @@ jobs:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-python-cpu:
name: "CPU Python tests"
uses: ./.github/workflows/_test_python.yml
name: "CPU tests"
uses: ./.github/workflows/_test.yml
needs: build-torch-xla
with:
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm
Expand All @@ -91,9 +68,9 @@ jobs:
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-python-cuda:
name: "GPU Python tests"
uses: ./.github/workflows/_test_python.yml
test-cuda:
name: "GPU tests"
uses: ./.github/workflows/_test.yml
needs: [build-torch-xla, build-cuda-plugin]
with:
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1
Expand Down
Loading

0 comments on commit b834e49

Please sign in to comment.