From b834e49907aab60e096620664ca8ba328ef92141 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Fri, 26 Apr 2024 15:12:10 -0700 Subject: [PATCH] Build CPP tests in new CI workflow (#6947) --- .github/scripts/run_tests.sh | 108 +++++++++++++ .github/workflows/_build_torch_xla.yml | 9 +- .../workflows/{_test_python.yml => _test.yml} | 24 ++- .github/workflows/_test_cpp.yml | 150 ------------------ .github/workflows/build_and_test.yml | 33 +--- BUILD | 20 +++ build_util.py | 4 - infra/ansible/config/env.yaml | 2 +- infra/ansible/config/vars.yaml | 2 + .../ansible/roles/build_srcs/tasks/main.yaml | 16 ++ setup.py | 4 + torch_xla/csrc/runtime/pjrt_registry.cc | 18 ++- 12 files changed, 200 insertions(+), 190 deletions(-) create mode 100755 .github/scripts/run_tests.sh rename .github/workflows/{_test_python.yml => _test.yml} (90%) delete mode 100644 .github/workflows/_test_cpp.yml diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh new file mode 100755 index 00000000000..ae59a51490d --- /dev/null +++ b/.github/scripts/run_tests.sh @@ -0,0 +1,108 @@ +set -ex + +function run_torch_xla_python_tests() { + PYTORCH_DIR=$1 + XLA_DIR=$2 + USE_COVERAGE="${3:-0}" + + pushd $XLA_DIR + echo "Running Python Tests" + if [ "$USE_COVERAGE" != "0" ]; then + pip install coverage==6.5.0 --upgrade + pip install coverage-lcov + pip install toml + ./test/run_tests.sh + coverage combine + mkdir lcov && cp .coverage lcov/ + coverage-lcov --data_file_path lcov/.coverage + coverage html + cp lcov.info htmlcov/ + mv htmlcov ~/ + chmod -R 755 ~/htmlcov + else + ./test/run_tests.sh + fi + popd +} + +function run_torch_xla_cpp_tests() { + PYTORCH_DIR=$1 + XLA_DIR=$2 + USE_COVERAGE="${3:-0}" + + TORCH_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch').get_filename()))") + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TORCH_DIR}/lib + if [ -x "$(command -v nvidia-smi)" ]; then + CUDA_PLUGIN_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch_xla_cuda_plugin').get_filename()))") + export PJRT_LIBRARY_PATH=$CUDA_PLUGIN_DIR/lib/pjrt_c_api_gpu_plugin.so + export PJRT_DEVICE=LIBRARY + export PJRT_DYNAMIC_PLUGINS=1 + else + export PJRT_DEVICE=CPU + fi + export XLA_EXPERIMENTAL="nonzero:masked_select:nms" + + test_names1=("test_aten_xla_tensor_1" + "test_aten_xla_tensor_2" + "test_aten_xla_tensor_3" + "test_aten_xla_tensor_4" + "pjrt_computation_client_test" + "ifrt_computation_client_test") + test_names2=("test_aten_xla_tensor_5" + "test_aten_xla_tensor_6" + "test_ir" + "test_lazy" + "test_replication" + "test_tensor" + # disable test_xla_backend_intf since it is flaky on upstream + #"test_xla_backend_intf" + "test_xla_sharding") + if [[ "$RUN_CPP_TESTS1" == "cpp_tests1" ]]; then + test_names=("${test_names1[@]}") + elif [[ "$RUN_CPP_TESTS2" == "cpp_tests2" ]]; then + test_names=("${test_names2[@]}") + else + test_names=("${test_names1[@]}" "${test_names2[@]}") + fi + + for name in "${test_names[@]}"; do + echo "Running $name cpp test..." + /tmp/test/bin/${name} + done +} + +function run_torch_xla_benchmark_tests() { + XLA_DIR=$1 + pushd $XLA_DIR + echo "Running Benchmark Tests" + test/benchmarks/run_tests.sh -L"" +} + +PYTORCH_DIR=$1 +XLA_DIR=$2 +USE_COVERAGE="${3:-0}" +RUN_CPP="${RUN_CPP_TESTS:0}" +RUN_PYTHON="${RUN_PYTHON_TESTS:0}" + +if [ -x "$(command -v nvidia-smi)" ]; then + num_devices=$(nvidia-smi --list-gpus | wc -l) + echo "Found $num_devices GPU devices..." + export GPU_NUM_DEVICES=$num_devices +fi +export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla" +export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))") + +if [[ -z "$RUN_BENCHMARK_TESTS" && -z "$RUN_CPP_TESTS1" && -z "$RUN_CPP_TESTS2" && -z "$RUN_PYTHON_TESTS" ]]; then + run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + run_torch_xla_benchmark_tests $XLA_DIR +else + # run tests separately. + if [[ "$RUN_PYTHON_TESTS" == "python_tests" ]]; then + run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + elif [[ "$RUN_BENCHMARK_TESTS" == "benchmark_tests" ]]; then + run_torch_xla_benchmark_tests $XLA_DIR + else + run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + fi +fi diff --git a/.github/workflows/_build_torch_xla.yml b/.github/workflows/_build_torch_xla.yml index c3200b76ef1..7614242fd7a 100644 --- a/.github/workflows/_build_torch_xla.yml +++ b/.github/workflows/_build_torch_xla.yml @@ -26,7 +26,7 @@ jobs: GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 - # BUILD_CPP_TESTS: 1 + BUILD_CPP_TESTS: 1 steps: - name: Setup gcloud shell: bash @@ -46,9 +46,14 @@ jobs: shell: bash run: | cd pytorch/xla/infra/ansible - ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps + ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 build_cpp_tests=1 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps - name: Upload wheel uses: actions/upload-artifact@v4 with: name: torch-xla-wheels path: /dist/*.whl + - name: Upload CPP test binaries + uses: actions/upload-artifact@v4 + with: + name: cpp-test-bin + path: /tmp/test/bin diff --git a/.github/workflows/_test_python.yml b/.github/workflows/_test.yml similarity index 90% rename from .github/workflows/_test_python.yml rename to .github/workflows/_test.yml index 960b326450b..ffb73a156fa 100644 --- a/.github/workflows/_test_python.yml +++ b/.github/workflows/_test.yml @@ -53,6 +53,10 @@ jobs: run_xla_op_tests3: 'xla_op3' - run_python_tests: 'python_tests' run_torch_mp_op_tests: 'torch_mp_op' + - run_cpp_tests: 'cpp_tests' + run_cpp_tests1: 'cpp_tests1' + - run_cpp_tests: 'cpp_tests' + run_cpp_tests2: 'cpp_tests2' timeout-minutes: ${{ inputs.timeout-minutes }} env: GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} @@ -64,6 +68,8 @@ jobs: RUN_XLA_OP_TESTS2: ${{ matrix.run_xla_op_tests2 }} RUN_XLA_OP_TESTS3: ${{ matrix.run_xla_op_tests3 }} RUN_TORCH_MP_OP_TESTS: ${{ matrix.run_torch_mp_op_tests }} + RUN_CPP_TESTS1: ${{ matrix.run_cpp_tests1 }} + RUN_CPP_TESTS2: ${{ matrix.run_cpp_tests2 }} BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 steps: @@ -76,6 +82,19 @@ jobs: with: name: torch-xla-wheels path: /tmp/wheels/ + - name: Fetch CPP test binaries + uses: actions/download-artifact@v4 + with: + name: cpp-test-bin + path: /tmp/test/bin + if: ${{ matrix.run_cpp_tests }} + # GitHub Actions doesn't preserve executable permissions + # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss + - name: Set CPP test permissions + run: | + chmod +x /tmp/test/bin/* + ls -l /tmp/test/bin + if: ${{ matrix.run_cpp_tests }} - name: Fetch CUDA plugin uses: actions/download-artifact@v4 with: @@ -134,10 +153,7 @@ jobs: fi - name: Test shell: bash - run: | - source pytorch/xla/.circleci/common.sh - - run_torch_xla_tests pytorch/ pytorch/xla/ $USE_COVERAGE + run: pytorch/xla/.github/scripts/run_tests.sh pytorch/ pytorch/xla/ $USE_COVERAGE - name: Upload coverage results if: ${{ inputs.collect-coverage }} shell: bash diff --git a/.github/workflows/_test_cpp.yml b/.github/workflows/_test_cpp.yml deleted file mode 100644 index d0056d34963..00000000000 --- a/.github/workflows/_test_cpp.yml +++ /dev/null @@ -1,150 +0,0 @@ -name: xla-test -on: - workflow_call: - inputs: - docker-image: - required: true - type: string - description: Image to test on - runner: - required: false - type: string - description: Runner type for the test - default: linux.12xlarge - collect-coverage: - required: false - type: boolean - description: Set to true to collect coverage information - default: false - timeout-minutes: - required: false - type: number - default: 270 - description: | - Set the maximum (in minutes) how long the workflow should take to finish - disable-pjrt: - required: false - type: string - default: 0 - description: Whether to disable PJRT tests - test-script: - required: false - type: string - default: test.sh - description: Which test script to run - - secrets: - gcloud-service-key: - required: true - description: Secret to access Bazel build cache -jobs: - test: - runs-on: ${{ inputs.runner }} - strategy: - fail-fast: false - matrix: - include: - # Use readable strings as they define the workflow titles. - - run_cpp_tests1: 'cpp_tests1' - - run_cpp_tests2: 'cpp_tests2' - timeout-minutes: ${{ inputs.timeout-minutes }} - env: - DOCKER_IMAGE: ${{ inputs.docker-image }} - WORKDIR: /var/lib/jenkins/workspace - GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} - USE_COVERAGE: ${{ inputs.collect-coverage && '1' || '0' }} - XLA_SKIP_TORCH_OP_TESTS: ${{ inputs.disable-pjrt }} - XLA_SKIP_MP_OP_TESTS: ${{ inputs.disable-pjrt }} - RUN_CPP_TESTS1: ${{ matrix.run_cpp_tests1 }} - RUN_CPP_TESTS2: ${{ matrix.run_cpp_tests2 }} - steps: - - name: Setup Linux - uses: pytorch/test-infra/.github/actions/setup-linux@main - - name: Setup SSH (Click me for login details) - uses: pytorch/test-infra/.github/actions/setup-ssh@main - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - instructions: | - Tests are done inside the container, to start an interactive session run: - docker exec -it $(docker container ps --format '{{.ID}}') bash - - name: Install gcloud CLI - if: ${{ inputs.collect-coverage }} - shell: bash - run: | - sudo tee -a /etc/yum.repos.d/google-cloud-sdk.repo << EOM - [google-cloud-cli] - name=Google Cloud CLI - baseurl=https://packages.cloud.google.com/yum/repos/cloud-sdk-el8-x86_64 - enabled=1 - gpgcheck=1 - repo_gpgcheck=0 - gpgkey=https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg - EOM - sudo yum install -y google-cloud-cli - - name: Auth to GCR - if: ${{ inputs.collect-coverage }} - shell: bash - run: | - echo "${GCLOUD_SERVICE_KEY}" | gcloud auth activate-service-account --key-file=- - - name: Download and run docker image from GCR - shell: bash - run: | - echo "DOCKER_IMAGE: ${DOCKER_IMAGE}" - docker pull "${DOCKER_IMAGE}" - pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_BENCHMARK_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}") - echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json" - echo "pid=${pid}" >> "${GITHUB_ENV}" - - name: Test - shell: bash - run: | - docker exec --privileged -u jenkins "${pid}" bash -c '.circleci/${{ inputs.test-script }}' - - name: Upload coverage results - if: ${{ inputs.collect-coverage }} - shell: bash - env: - CIRCLE_WORKFLOW_ID: ${{ github.run_id }} - CIRCLE_BUILD_NUM: ${{ github.run_number }} - BENCHMARK_TEST_NAME: ${{ env.RUN_BENCHMARK_TESTS }} - PYTHON_TEST_NAME: ${{ env.RUN_PYTHON_TESTS }}${{ env.RUN_XLA_OP_TESTS1 }}${{ env.RUN_XLA_OP_TESTS2 }}${{ env.RUN_XLA_OP_TESTS3 }}${{ env.RUN_TORCH_MP_OP_TESTS }} - CPP_TEST_NAME: ${{ env.RUN_CPP_TESTS1 }}${{ env.RUN_CPP_TESTS2 }} - run: | - # TODO(yeounoh) collect coverage report as needed. - if [ -n "${BENCHMARK_TEST_NAME}" ]; then - exit 0 - fi - docker cp "${pid}":/home/jenkins/htmlcov "${GITHUB_WORKSPACE}" - if [ -n "${GPU_FLAG:-}" ]; then - if [ -n "${PYTHON_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out - fi - if [ -n "${CPP_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out - fi - else - if [ -n "${PYTHON_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out - fi - - if [ -n "${CPP_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out - fi - - if [ "${CPP_TEST_NAME}" == "cpp_tests1" ]; then - ABS_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "commit_id": '\"${GITHUB_SHA}\"', "ref": "HEAD", "source": "https://github.com/pytorch/xla", "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}' - echo $ABS_METADATA > abs_metadata.json - gsutil cp abs_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json - - INC_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "patchset_num": 1, "change_id": '${CIRCLE_BUILD_NUM}', "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}' - echo $INC_METADATA > inc_metadata.json - gsutil cp inc_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json - fi - fi - - - name: Teardown Linux - uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() - diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e5738b5a6af..ce90448738a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -30,29 +30,6 @@ jobs: secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - test-cpp-cpu: - name: "CPU C++ tests" - uses: ./.github/workflows/_test_cpp.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - timeout-minutes: 120 - collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - - test-cpp-cuda: - name: "GPU C++ tests" - uses: ./.github/workflows/_test_cpp.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - runner: linux.8xlarge.nvidia.gpu - timeout-minutes: 300 - collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - push-docs: name: "Build & publish docs" if: github.event_name == 'push' && (github.event.ref == 'refs/heads/master' || startsWith(github.event.ref, 'refs/tags/r')) @@ -81,8 +58,8 @@ jobs: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} test-python-cpu: - name: "CPU Python tests" - uses: ./.github/workflows/_test_python.yml + name: "CPU tests" + uses: ./.github/workflows/_test.yml needs: build-torch-xla with: dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm @@ -91,9 +68,9 @@ jobs: secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - test-python-cuda: - name: "GPU Python tests" - uses: ./.github/workflows/_test_python.yml + test-cuda: + name: "GPU tests" + uses: ./.github/workflows/_test.yml needs: [build-torch-xla, build-cuda-plugin] with: dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 diff --git a/BUILD b/BUILD index 6949f6dc748..60b601240fc 100644 --- a/BUILD +++ b/BUILD @@ -30,3 +30,23 @@ cc_binary( "@xla//xla/stream_executor:cuda_platform", ]), ) + +test_suite( + name = "cpp_tests", + # testonly = True, + tests = [ + "//test/cpp:test_aten_xla_tensor_1", + "//test/cpp:test_aten_xla_tensor_2", + "//test/cpp:test_aten_xla_tensor_3", + "//test/cpp:test_aten_xla_tensor_4", + "//test/cpp:test_aten_xla_tensor_5", + "//test/cpp:test_aten_xla_tensor_6", + "//test/cpp:test_ir", + "//test/cpp:test_lazy", + "//test/cpp:test_replication", + "//test/cpp:test_tensor", + "//test/cpp:test_xla_sharding", + "//torch_xla/csrc/runtime:pjrt_computation_client_test", + "//torch_xla/csrc/runtime:ifrt_computation_client_test", + ], +) diff --git a/build_util.py b/build_util.py index 78e4bd5e453..487f5116323 100644 --- a/build_util.py +++ b/build_util.py @@ -36,10 +36,6 @@ def bazel_options_from_env() -> Iterable[str]: bazel_flags.append('--remote_default_exec_properties=cache-silo-key=%s' % cache_silo_name) - if check_env_flag('BUILD_CPP_TESTS', default='0'): - bazel_flags.append('//test/cpp:all') - bazel_flags.append('//torch_xla/csrc/runtime:all') - bazel_jobs = os.getenv('BAZEL_JOBS', default='') if bazel_jobs: bazel_flags.append('--jobs=%s' % bazel_jobs) diff --git a/infra/ansible/config/env.yaml b/infra/ansible/config/env.yaml index d324729ce11..ea785519bae 100644 --- a/infra/ansible/config/env.yaml +++ b/infra/ansible/config/env.yaml @@ -22,7 +22,7 @@ build_env: common: LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" # Set explicitly to 0 as setup.py defaults this flag to true if unset. - BUILD_CPP_TESTS: 0 + BUILD_CPP_TESTS: "{{ build_cpp_tests }}" # Force GCC because clang/bazel has issues. CC: gcc-10 CXX: g++-10 diff --git a/infra/ansible/config/vars.yaml b/infra/ansible/config/vars.yaml index c1ca7a93d27..1ab00087b60 100644 --- a/infra/ansible/config/vars.yaml +++ b/infra/ansible/config/vars.yaml @@ -14,3 +14,5 @@ nightly_release: false bundle_libtpu: 1 # Suffix for bazel remote cache key cache_suffix: "" +# Whether to build C++ tests with `torch_xla` wheel +build_cpp_tests: 0 diff --git a/infra/ansible/roles/build_srcs/tasks/main.yaml b/infra/ansible/roles/build_srcs/tasks/main.yaml index 87adde1ed21..d69e9012718 100644 --- a/infra/ansible/roles/build_srcs/tasks/main.yaml +++ b/infra/ansible/roles/build_srcs/tasks/main.yaml @@ -92,6 +92,22 @@ state: absent mode: '0755' +- name: Create temp directory for C++ tests + ansible.builtin.file: + path: /tmp/test/bin + state: directory + mode: '0755' + when: build_cpp_tests + +- name: Collect C++ test files + ansible.builtin.shell: | + cd pytorch/xla/build/temp* + bazel query 'kind(".*_test", tests(//:cpp_tests))' --output=label | xargs -n 1 bazel cquery --output=files | xargs cp -t /tmp/test/bin + args: + executable: bash + chdir: "{{ src_root }}" + when: build_cpp_tests + - name: Read Torchvision pin ansible.builtin.command: cat {{ (src_root, 'pytorch') | path_join }}/.github/ci_commit_pins/vision.txt register: torchvision_pin diff --git a/setup.py b/setup.py index dbe47007aff..a1db046e679 100644 --- a/setup.py +++ b/setup.py @@ -223,6 +223,10 @@ def bazel_build(self, ext): f"--symlink_prefix={os.path.join(self.build_temp, 'bazel-')}" ] + build_cpp_tests = build_util.check_env_flag('BUILD_CPP_TESTS', default='0') + if build_cpp_tests: + bazel_argv.append('//:cpp_tests') + import torch cxx_abi = os.getenv('CXX_ABI') or getattr(torch._C, '_GLIBCXX_USE_CXX11_ABI', None) diff --git a/torch_xla/csrc/runtime/pjrt_registry.cc b/torch_xla/csrc/runtime/pjrt_registry.cc index 99e23f4b555..52b06d89cb4 100644 --- a/torch_xla/csrc/runtime/pjrt_registry.cc +++ b/torch_xla/csrc/runtime/pjrt_registry.cc @@ -21,8 +21,24 @@ namespace runtime { namespace { +// Placeholder plugin for testing only. Does not implement multiprocessing or +// configuration. Very likely will not work from Python code. +class LibraryPlugin : public PjRtPlugin { + public: + std::string library_path() const override { + return sys_util::GetEnvString("PJRT_LIBRARY_PATH", ""); + } + + const std::unordered_map + client_create_options() const override { + return {}; + } + + bool requires_xla_coordinator() const override { return false; } +}; + std::unordered_map> - pjrt_plugins_; + pjrt_plugins_ = {{"LIBRARY", std::make_shared()}}; xla::GpuAllocatorConfig GetGpuAllocatorConfig() { auto allocator_config = xla::GpuAllocatorConfig{};