From f083e10587537268af3af157c2fb4a328f41d8f4 Mon Sep 17 00:00:00 2001
From: Will Cromar <wcromar@google.com>
Date: Wed, 27 Mar 2024 09:07:31 -0700
Subject: [PATCH] Remove references to XRT in build and CI tooling (#6761)

---
 .bazelrc                                 |  3 --
 .github/workflows/_build.yml             | 15 +-------
 .github/workflows/_test.yml              |  8 +---
 .github/workflows/build_and_test.yml     |  2 -
 .github/workflows/build_and_test_xrt.yml | 48 ------------------------
 .kokoro/Dockerfile                       |  1 -
 README.md                                |  4 --
 contrib/k8s/test_train_mp_mnist.yaml     | 46 -----------------------
 infra/ansible/config/env.yaml            |  1 -
 infra/ansible/config/vars.yaml           |  2 -
 infra/tpu-pytorch/test_triggers.tf       |  1 -
 11 files changed, 2 insertions(+), 129 deletions(-)
 delete mode 100644 .github/workflows/build_and_test_xrt.yml
 delete mode 100644 contrib/k8s/test_train_mp_mnist.yaml

diff --git a/.bazelrc b/.bazelrc
index 34c41167982..69787e39199 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -75,9 +75,6 @@ build:tpu --define=with_tpu_support=true
 test:tpu --local_test_jobs=1
 test:cuda --local_test_jobs=1
 
-# Exclude XRT from the build
-build:disable_xrt --define=disable_xrt=true
-
 #########################################################################
 # RBE config options below.
 # Flag to enable remote config
diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml
index 186297857b2..789d0579272 100644
--- a/.github/workflows/_build.yml
+++ b/.github/workflows/_build.yml
@@ -20,11 +20,6 @@ on:
         type: string
         description: Whether to build XLA with CUDA
         default: 1
-      disable_xrt:
-        required: false
-        type: string
-        description: Whether to disable XRT in the build
-        default: 0
 
     secrets:
       gcloud-service-key:
@@ -48,7 +43,6 @@ jobs:
       SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
       GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }}
       XLA_CUDA: ${{ inputs.cuda }}
-      DISABLE_XRT: ${{ inputs.disable_xrt }}
       BAZEL_JOBS: 16
     steps:
       - name: Setup Linux
@@ -88,7 +82,6 @@ jobs:
         shell: bash
         run: |
           echo "declare -x SCCACHE_BUCKET=${SCCACHE_BUCKET}" | docker exec -i "${pid}" sh -c "cat >> env"
-          echo "declare -x DISABLE_XRT=${DISABLE_XRT}" | docker exec -i "${pid}" sh -c "cat >> xla_env"
           echo "declare -x XLA_CUDA=${XLA_CUDA}" | docker exec -i "${pid}" sh -c "cat >> xla_env"
           echo "declare -x BAZEL_JOBS=${BAZEL_JOBS}" | docker exec -i "${pid}" sh -c "cat >> xla_env"
           echo "declare -x BAZEL_REMOTE_CACHE=1" | docker exec -i "${pid}" sh -c "cat >> xla_env"
@@ -107,13 +100,7 @@ jobs:
         id: upload-docker-image
         shell: bash
         run: |
-          if [[ ${DISABLE_XRT} == 1 ]]; then
-            image_tag_base=latest
-          else
-            image_tag_base=latest-xrt
-          fi
-
-          export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE_BASE}:${image_tag_base}-${GITHUB_SHA}"
+          export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE_BASE}:latest-${GITHUB_SHA}"
           time docker commit "${pid}" "${COMMIT_DOCKER_IMAGE}"
           time docker push "${COMMIT_DOCKER_IMAGE}"
           echo "docker-image=${COMMIT_DOCKER_IMAGE}" >> "${GITHUB_OUTPUT}"
diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
index cd5e8f9bb94..0f9e96e31e5 100644
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -27,11 +27,6 @@ on:
         type: string
         default: 0
         description: Whether to disable PJRT tests
-      disable-xrt:
-        required: false
-        type: string
-        default: 0
-        description: Whether to disable XRT tests
       test-script:
         required: false
         type: string
@@ -67,7 +62,6 @@ jobs:
       WORKDIR: /var/lib/jenkins/workspace
       GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }}
       USE_COVERAGE: ${{ inputs.collect-coverage && '1' || '0' }}
-      XLA_SKIP_XRT_TESTS: ${{ inputs.disable-xrt }}
       XLA_SKIP_TORCH_OP_TESTS: ${{ inputs.disable-pjrt }}
       XLA_SKIP_MP_OP_TESTS: ${{ inputs.disable-pjrt }}
       RUN_BENCHMARK_TESTS: ${{ matrix.run_benchmark_tests }}
@@ -112,7 +106,7 @@ jobs:
         run: |
           echo "DOCKER_IMAGE: ${DOCKER_IMAGE}"
           docker pull "${DOCKER_IMAGE}"
-          pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_XRT_TESTS -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_BENCHMARK_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}")
+          pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_BENCHMARK_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}")
           echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json"
           echo "pid=${pid}" >> "${GITHUB_ENV}"
       - name: Test
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index cf1c06817bf..41bca83b5cb 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -36,7 +36,6 @@ jobs:
       docker-image: ${{ needs.build.outputs.docker-image }}
       timeout-minutes: 120
       collect-coverage: false
-      disable-xrt: 1
     secrets:
       gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}
 
@@ -49,7 +48,6 @@ jobs:
       runner: linux.8xlarge.nvidia.gpu
       timeout-minutes: 300
       collect-coverage: false  # TODO(yeounoh) separate from CPU coverage metrics
-      disable-xrt: 1
     secrets:
       gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}
 
diff --git a/.github/workflows/build_and_test_xrt.yml b/.github/workflows/build_and_test_xrt.yml
deleted file mode 100644
index 79f96e0c19c..00000000000
--- a/.github/workflows/build_and_test_xrt.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: Build & Test XRT branch
-on:
-  pull_request:
-    branches:
-      - xrt
-  push:
-    branches:
-      - xrt
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
-  cancel-in-progress: true
-
-jobs:
-  build:
-    name: "Build XLA"
-    uses: ./.github/workflows/_build.yml
-    with:
-      ecr-docker-image-base: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base
-      gcr-docker-image: gcr.io/tpu-pytorch/xla_base:dev-3.8_cuda_12.1
-      disable_xrt: 0
-      cuda: 1
-    secrets:
-      gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}
-
-  test-cpu:
-    name: "CPU tests"
-    uses: ./.github/workflows/_test.yml
-    needs: build
-    with:
-      docker-image: ${{ needs.build.outputs.docker-image }}
-      timeout-minutes: 90
-      disable-xrt: 0
-    secrets:
-      gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}
-
-  test-cuda:
-    name: "GPU tests"
-    uses: ./.github/workflows/_test.yml
-    needs: build
-    with:
-      docker-image: ${{ needs.build.outputs.docker-image }}
-      runner: linux.8xlarge.nvidia.gpu
-      timeout-minutes: 180
-      disable-xrt: 0
-    secrets:
-      gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}
diff --git a/.kokoro/Dockerfile b/.kokoro/Dockerfile
index 40210aba1f3..e85930b57d9 100644
--- a/.kokoro/Dockerfile
+++ b/.kokoro/Dockerfile
@@ -6,7 +6,6 @@ RUN apt-get -y install clang time
 RUN pip install pytest
 ARG USE_MKLDNN=0
 ARG SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
-ARG DISABLE_XRT=1
 ARG XLA_CUDA=0
 ARG BAZEL_REMOTE_CACHE=1
 ARG USE_FBGEMM=0
diff --git a/README.md b/README.md
index fa5cd88c5ec..5508c2c9de0 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,6 @@
 <b>Current CI status:</b>  ![GitHub Actions
 status](https://github.com/pytorch/xla/actions/workflows/build_and_test.yml/badge.svg)
 
-Note: PyTorch/XLA r2.1 will be the last release with XRT available as a legacy
-runtime. Our main release build will not include XRT, but it will be available
-in a separate package.
-
 PyTorch/XLA is a Python package that uses the [XLA deep learning
 compiler](https://www.tensorflow.org/xla) to connect the [PyTorch deep learning
 framework](https://pytorch.org/) and [Cloud
diff --git a/contrib/k8s/test_train_mp_mnist.yaml b/contrib/k8s/test_train_mp_mnist.yaml
deleted file mode 100644
index 70e0ff802d0..00000000000
--- a/contrib/k8s/test_train_mp_mnist.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: pytorch-tpu-train-mnist
-spec:
-  template:
-    metadata:
-      annotations:
-        # The runtime version that the TPU will run with.
-        # Note: It's called "tf-version" for historical reasons.
-        tf-version.cloud-tpus.google.com: "pytorch-nightly"
-    spec:
-      restartPolicy: Never
-      volumes:
-      # Increase size of tmpfs /dev/shm to avoid OOM.
-      - name: dshm
-        emptyDir:
-          medium: Memory
-      containers:
-      - name: mnist-pytorch-tpu
-        # This is the image we publish nightly with our package pre-installed.
-        image: gcr.io/tpu-pytorch/xla:nightly
-        volumeMounts:
-        - mountPath: /dev/shm
-          name: dshm
-        # For the time being we need to manually set XRT_TPU_CONFIG from
-        # KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS env var hooked in by GKE.
-        command: [
-          'bash', '-c',
-          'XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}"
-          python pytorch/xla/test/test_train_mp_mnist.py'
-        ]
-        env:
-        # Example environment variables injected to container on GKE.
-        - name: XLA_USE_BF16
-          value: "0"
-        resources:
-          limits:
-            # Request a single v3-8 Cloud TPU device to train the model.
-            # A single v3-8 Cloud TPU device consists of 4 chips, each of which
-            # has 2 cores, so there are 8 cores in total.
-            cloud-tpus.google.com/v3: 8
-          requests:
-            memory: 30Gi
-            cpu: 10
-
diff --git a/infra/ansible/config/env.yaml b/infra/ansible/config/env.yaml
index 791c2f95ded..15e8dc79d6c 100644
--- a/infra/ansible/config/env.yaml
+++ b/infra/ansible/config/env.yaml
@@ -32,7 +32,6 @@ build_env:
     XLA_SANDBOX_BUILD: 1
     BAZEL_REMOTE_CACHE: 1
     SILO_NAME: "cache-silo-{{ arch }}-{{ accelerator }}-{{ clang_version }}"
-    DISABLE_XRT: "{{ disable_xrt }}"
     _GLIBCXX_USE_CXX11_ABI: 0
     GIT_VERSIONED_XLA_BUILD: "{{ nightly_release }}"
 
diff --git a/infra/ansible/config/vars.yaml b/infra/ansible/config/vars.yaml
index c457ba8608b..d9e0258c709 100644
--- a/infra/ansible/config/vars.yaml
+++ b/infra/ansible/config/vars.yaml
@@ -8,7 +8,5 @@ clang_version: 17
 package_version: 2.3.0
 # If set to true, wheels will be renamed to $WHEEL_NAME-nightly-cp38-cp38-linux_x86_64.whl.
 nightly_release: false
-# Whether to disable XRT during build
-disable_xrt: 0
 # Whether to preinstall libtpu in the PyTorch/XLA wheel. Ignored for GPU build.
 bundle_libtpu: 1
diff --git a/infra/tpu-pytorch/test_triggers.tf b/infra/tpu-pytorch/test_triggers.tf
index 21b169d4d54..85590954f6e 100644
--- a/infra/tpu-pytorch/test_triggers.tf
+++ b/infra/tpu-pytorch/test_triggers.tf
@@ -26,7 +26,6 @@ module "tpu_e2e_tests" {
   ansible_vars = {
     arch            = "amd64"
     accelerator     = "tpu"
-    disable_xrt     = "1"
     pytorch_git_rev = "main"
     # The commit ID associated with the triggered build. Substituted when
     # Cloud Build is triggered.