From f083e10587537268af3af157c2fb4a328f41d8f4 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Wed, 27 Mar 2024 09:07:31 -0700 Subject: [PATCH] Remove references to XRT in build and CI tooling (#6761) --- .bazelrc | 3 -- .github/workflows/_build.yml | 15 +------- .github/workflows/_test.yml | 8 +--- .github/workflows/build_and_test.yml | 2 - .github/workflows/build_and_test_xrt.yml | 48 ------------------------ .kokoro/Dockerfile | 1 - README.md | 4 -- contrib/k8s/test_train_mp_mnist.yaml | 46 ----------------------- infra/ansible/config/env.yaml | 1 - infra/ansible/config/vars.yaml | 2 - infra/tpu-pytorch/test_triggers.tf | 1 - 11 files changed, 2 insertions(+), 129 deletions(-) delete mode 100644 .github/workflows/build_and_test_xrt.yml delete mode 100644 contrib/k8s/test_train_mp_mnist.yaml diff --git a/.bazelrc b/.bazelrc index 34c41167982..69787e39199 100644 --- a/.bazelrc +++ b/.bazelrc @@ -75,9 +75,6 @@ build:tpu --define=with_tpu_support=true test:tpu --local_test_jobs=1 test:cuda --local_test_jobs=1 -# Exclude XRT from the build -build:disable_xrt --define=disable_xrt=true - ######################################################################### # RBE config options below. # Flag to enable remote config diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index 186297857b2..789d0579272 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -20,11 +20,6 @@ on: type: string description: Whether to build XLA with CUDA default: 1 - disable_xrt: - required: false - type: string - description: Whether to disable XRT in the build - default: 0 secrets: gcloud-service-key: @@ -48,7 +43,6 @@ jobs: SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} XLA_CUDA: ${{ inputs.cuda }} - DISABLE_XRT: ${{ inputs.disable_xrt }} BAZEL_JOBS: 16 steps: - name: Setup Linux @@ -88,7 +82,6 @@ jobs: shell: bash run: | echo "declare -x SCCACHE_BUCKET=${SCCACHE_BUCKET}" | docker exec -i "${pid}" sh -c "cat >> env" - echo "declare -x DISABLE_XRT=${DISABLE_XRT}" | docker exec -i "${pid}" sh -c "cat >> xla_env" echo "declare -x XLA_CUDA=${XLA_CUDA}" | docker exec -i "${pid}" sh -c "cat >> xla_env" echo "declare -x BAZEL_JOBS=${BAZEL_JOBS}" | docker exec -i "${pid}" sh -c "cat >> xla_env" echo "declare -x BAZEL_REMOTE_CACHE=1" | docker exec -i "${pid}" sh -c "cat >> xla_env" @@ -107,13 +100,7 @@ jobs: id: upload-docker-image shell: bash run: | - if [[ ${DISABLE_XRT} == 1 ]]; then - image_tag_base=latest - else - image_tag_base=latest-xrt - fi - - export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE_BASE}:${image_tag_base}-${GITHUB_SHA}" + export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE_BASE}:latest-${GITHUB_SHA}" time docker commit "${pid}" "${COMMIT_DOCKER_IMAGE}" time docker push "${COMMIT_DOCKER_IMAGE}" echo "docker-image=${COMMIT_DOCKER_IMAGE}" >> "${GITHUB_OUTPUT}" diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml index cd5e8f9bb94..0f9e96e31e5 100644 --- a/.github/workflows/_test.yml +++ b/.github/workflows/_test.yml @@ -27,11 +27,6 @@ on: type: string default: 0 description: Whether to disable PJRT tests - disable-xrt: - required: false - type: string - default: 0 - description: Whether to disable XRT tests test-script: required: false type: string @@ -67,7 +62,6 @@ jobs: WORKDIR: /var/lib/jenkins/workspace GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} USE_COVERAGE: ${{ inputs.collect-coverage && '1' || '0' }} - XLA_SKIP_XRT_TESTS: ${{ inputs.disable-xrt }} XLA_SKIP_TORCH_OP_TESTS: ${{ inputs.disable-pjrt }} XLA_SKIP_MP_OP_TESTS: ${{ inputs.disable-pjrt }} RUN_BENCHMARK_TESTS: ${{ matrix.run_benchmark_tests }} @@ -112,7 +106,7 @@ jobs: run: | echo "DOCKER_IMAGE: ${DOCKER_IMAGE}" docker pull "${DOCKER_IMAGE}" - pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_XRT_TESTS -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_BENCHMARK_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}") + pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_BENCHMARK_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}") echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json" echo "pid=${pid}" >> "${GITHUB_ENV}" - name: Test diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index cf1c06817bf..41bca83b5cb 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -36,7 +36,6 @@ jobs: docker-image: ${{ needs.build.outputs.docker-image }} timeout-minutes: 120 collect-coverage: false - disable-xrt: 1 secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} @@ -49,7 +48,6 @@ jobs: runner: linux.8xlarge.nvidia.gpu timeout-minutes: 300 collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics - disable-xrt: 1 secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} diff --git a/.github/workflows/build_and_test_xrt.yml b/.github/workflows/build_and_test_xrt.yml deleted file mode 100644 index 79f96e0c19c..00000000000 --- a/.github/workflows/build_and_test_xrt.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Build & Test XRT branch -on: - pull_request: - branches: - - xrt - push: - branches: - - xrt - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: true - -jobs: - build: - name: "Build XLA" - uses: ./.github/workflows/_build.yml - with: - ecr-docker-image-base: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base - gcr-docker-image: gcr.io/tpu-pytorch/xla_base:dev-3.8_cuda_12.1 - disable_xrt: 0 - cuda: 1 - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - - test-cpu: - name: "CPU tests" - uses: ./.github/workflows/_test.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - timeout-minutes: 90 - disable-xrt: 0 - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - - test-cuda: - name: "GPU tests" - uses: ./.github/workflows/_test.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - runner: linux.8xlarge.nvidia.gpu - timeout-minutes: 180 - disable-xrt: 0 - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} diff --git a/.kokoro/Dockerfile b/.kokoro/Dockerfile index 40210aba1f3..e85930b57d9 100644 --- a/.kokoro/Dockerfile +++ b/.kokoro/Dockerfile @@ -6,7 +6,6 @@ RUN apt-get -y install clang time RUN pip install pytest ARG USE_MKLDNN=0 ARG SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 -ARG DISABLE_XRT=1 ARG XLA_CUDA=0 ARG BAZEL_REMOTE_CACHE=1 ARG USE_FBGEMM=0 diff --git a/README.md b/README.md index fa5cd88c5ec..5508c2c9de0 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,6 @@ Current CI status: ![GitHub Actions status](https://github.com/pytorch/xla/actions/workflows/build_and_test.yml/badge.svg) -Note: PyTorch/XLA r2.1 will be the last release with XRT available as a legacy -runtime. Our main release build will not include XRT, but it will be available -in a separate package. - PyTorch/XLA is a Python package that uses the [XLA deep learning compiler](https://www.tensorflow.org/xla) to connect the [PyTorch deep learning framework](https://pytorch.org/) and [Cloud diff --git a/contrib/k8s/test_train_mp_mnist.yaml b/contrib/k8s/test_train_mp_mnist.yaml deleted file mode 100644 index 70e0ff802d0..00000000000 --- a/contrib/k8s/test_train_mp_mnist.yaml +++ /dev/null @@ -1,46 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: pytorch-tpu-train-mnist -spec: - template: - metadata: - annotations: - # The runtime version that the TPU will run with. - # Note: It's called "tf-version" for historical reasons. - tf-version.cloud-tpus.google.com: "pytorch-nightly" - spec: - restartPolicy: Never - volumes: - # Increase size of tmpfs /dev/shm to avoid OOM. - - name: dshm - emptyDir: - medium: Memory - containers: - - name: mnist-pytorch-tpu - # This is the image we publish nightly with our package pre-installed. - image: gcr.io/tpu-pytorch/xla:nightly - volumeMounts: - - mountPath: /dev/shm - name: dshm - # For the time being we need to manually set XRT_TPU_CONFIG from - # KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS env var hooked in by GKE. - command: [ - 'bash', '-c', - 'XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}" - python pytorch/xla/test/test_train_mp_mnist.py' - ] - env: - # Example environment variables injected to container on GKE. - - name: XLA_USE_BF16 - value: "0" - resources: - limits: - # Request a single v3-8 Cloud TPU device to train the model. - # A single v3-8 Cloud TPU device consists of 4 chips, each of which - # has 2 cores, so there are 8 cores in total. - cloud-tpus.google.com/v3: 8 - requests: - memory: 30Gi - cpu: 10 - diff --git a/infra/ansible/config/env.yaml b/infra/ansible/config/env.yaml index 791c2f95ded..15e8dc79d6c 100644 --- a/infra/ansible/config/env.yaml +++ b/infra/ansible/config/env.yaml @@ -32,7 +32,6 @@ build_env: XLA_SANDBOX_BUILD: 1 BAZEL_REMOTE_CACHE: 1 SILO_NAME: "cache-silo-{{ arch }}-{{ accelerator }}-{{ clang_version }}" - DISABLE_XRT: "{{ disable_xrt }}" _GLIBCXX_USE_CXX11_ABI: 0 GIT_VERSIONED_XLA_BUILD: "{{ nightly_release }}" diff --git a/infra/ansible/config/vars.yaml b/infra/ansible/config/vars.yaml index c457ba8608b..d9e0258c709 100644 --- a/infra/ansible/config/vars.yaml +++ b/infra/ansible/config/vars.yaml @@ -8,7 +8,5 @@ clang_version: 17 package_version: 2.3.0 # If set to true, wheels will be renamed to $WHEEL_NAME-nightly-cp38-cp38-linux_x86_64.whl. nightly_release: false -# Whether to disable XRT during build -disable_xrt: 0 # Whether to preinstall libtpu in the PyTorch/XLA wheel. Ignored for GPU build. bundle_libtpu: 1 diff --git a/infra/tpu-pytorch/test_triggers.tf b/infra/tpu-pytorch/test_triggers.tf index 21b169d4d54..85590954f6e 100644 --- a/infra/tpu-pytorch/test_triggers.tf +++ b/infra/tpu-pytorch/test_triggers.tf @@ -26,7 +26,6 @@ module "tpu_e2e_tests" { ansible_vars = { arch = "amd64" accelerator = "tpu" - disable_xrt = "1" pytorch_git_rev = "main" # The commit ID associated with the triggered build. Substituted when # Cloud Build is triggered.