diff --git a/.github/workflows/_build_plugin.yml b/.github/workflows/_build_plugin.yml index 441dbc6a327..d6de495a4e7 100644 --- a/.github/workflows/_build_plugin.yml +++ b/.github/workflows/_build_plugin.yml @@ -27,14 +27,17 @@ jobs: BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }} steps: - - name: Setup gcloud - shell: bash - run: | - echo "${GCLOUD_SERVICE_KEY}" > $GOOGLE_APPLICATION_CREDENTIALS - - name: Checkout repo + - name: Checkout actions uses: actions/checkout@v4 with: - path: pytorch/xla + sparse-checkout: | + .github/workflows/setup + path: .actions + - name: Setup + uses: ./.actions/.github/workflows/setup + with: + torch-commit: ${{ inputs.torch-commit }} + cuda: true - name: Build shell: bash run: | diff --git a/.github/workflows/_build_torch_with_cuda.yml b/.github/workflows/_build_torch_with_cuda.yml index 296e79b7dfb..6b89c6d2960 100644 --- a/.github/workflows/_build_torch_with_cuda.yml +++ b/.github/workflows/_build_torch_with_cuda.yml @@ -23,16 +23,17 @@ jobs: env: _GLIBCXX_USE_CXX11_ABI: 0 steps: - # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - - name: Clean up workspace - run: | - ls -la - rm -rvf ${GITHUB_WORKSPACE}/* - - name: Setup CUDA environment - shell: bash - run: | - echo "PATH=$PATH:/usr/local/cuda-12.1/bin" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.1/lib64" >> $GITHUB_ENV + - name: Checkout actions + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/workflows/setup + path: .actions + - name: Setup + uses: ./.actions/.github/workflows/setup + with: + torch-commit: ${{ inputs.torch-commit }} + cuda: true - name: Checkout PyTorch Repo uses: actions/checkout@v4 with: diff --git a/.github/workflows/_build_torch_xla.yml b/.github/workflows/_build_torch_xla.yml index 56e6b5408c3..6df169ed5ac 100644 --- a/.github/workflows/_build_torch_xla.yml +++ b/.github/workflows/_build_torch_xla.yml @@ -27,31 +27,22 @@ jobs: image: ${{ inputs.dev-image }} env: GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} - GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json - BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }} + BAZEL_JOBS: 16 BUILD_CPP_TESTS: 1 steps: - # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - - name: Clean up workspace - run: | - ls -la - rm -rvf ${GITHUB_WORKSPACE}/* - - name: Setup gcloud - shell: bash - run: | - echo "${GCLOUD_SERVICE_KEY}" > $GOOGLE_APPLICATION_CREDENTIALS - - name: Checkout PyTorch Repo + # Need to check out local composite actions before using them + # https://github.com/orgs/community/discussions/11771 + - name: Checkout actions uses: actions/checkout@v4 with: - repository: pytorch/pytorch - path: pytorch - ref: ${{ inputs.torch-commit }} - submodules: recursive - - name: Checkout PyTorch/XLA Repo - uses: actions/checkout@v4 + sparse-checkout: | + .github/workflows/setup + path: .actions + - name: Setup + uses: ./.actions/.github/workflows/setup with: - path: pytorch/xla + torch-commit: ${{ inputs.torch-commit }} - name: Build shell: bash run: | diff --git a/.github/workflows/_get_torch_commit.yml b/.github/workflows/_get_torch_commit.yml deleted file mode 100644 index 062a95a248e..00000000000 --- a/.github/workflows/_get_torch_commit.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: get-torch-commit -on: - workflow_call: - outputs: - torch_commit: - description: "torch commit to be used" - value: ${{ jobs.get-commit.outputs.torch_commit }} - -jobs: - get-commit: - runs-on: ubuntu-20.04 - outputs: - torch_commit: ${{ steps.get_torch_commit.outputs.torch_commit }} - steps: - # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - - name: Clean up workspace - run: | - ls -la - rm -rvf ${GITHUB_WORKSPACE}/* - - id: get_torch_commit - name: Get torch commit - run: | - torch_commit=$(git ls-remote https://github.com/pytorch/pytorch.git HEAD | awk '{print $1}') - echo "torch_commit=$torch_commit" >> "$GITHUB_OUTPUT" - diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml index ed0376558eb..bf974c2bd22 100644 --- a/.github/workflows/_test.yml +++ b/.github/workflows/_test.yml @@ -77,20 +77,19 @@ jobs: BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 steps: - # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - - name: Clean up workspace - run: | - ls -la - rm -rvf ${GITHUB_WORKSPACE}/* - - name: Setup gcloud - shell: bash - run: | - echo "${GCLOUD_SERVICE_KEY}" > $GOOGLE_APPLICATION_CREDENTIALS - - name: Fetch wheels - uses: actions/download-artifact@v4 + - name: Checkout actions + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/workflows/setup + path: .actions + - name: Setup + uses: ./.actions/.github/workflows/setup with: - name: torch-xla-wheels - path: /tmp/wheels/ + torch-commit: ${{ inputs.torch-commit }} + cuda: ${{ inputs.install-cuda-plugin && true || false }} + wheels-artifact: torch-xla-wheels + cuda-plugin-artifact: ${{ inputs.install-cuda-plugin && 'cuda-plugin' || null }} - name: Fetch CPP test binaries uses: actions/download-artifact@v4 with: @@ -104,34 +103,15 @@ jobs: chmod +x /tmp/test/bin/* ls -l /tmp/test/bin if: ${{ matrix.run_cpp_tests }} - - name: Fetch CUDA plugin - uses: actions/download-artifact@v4 - with: - name: cuda-plugin - path: /tmp/wheels/ - if: ${{ inputs.install-cuda-plugin }} - - name: Setup CUDA environment - shell: bash - run: | - # TODO: Make PJRT_DEVICE=CPU work with XLA_REGISTER_INSTALLED_PLUGINS=1 - echo "XLA_REGISTER_INSTALLED_PLUGINS=1" >> $GITHUB_ENV - - echo "PATH=$PATH:/usr/local/cuda-12.1/bin" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.1/lib64" >> $GITHUB_ENV - if: ${{ inputs.install-cuda-plugin }} - name: Check GPU run: nvidia-smi if: ${{ inputs.install-cuda-plugin }} - - name: Install wheels + - name: Install test deps shell: bash run: | - pip install /tmp/wheels/*.whl # TODO: Add these in setup.py pip install fsspec pip install rich - - echo "Import check..." - python -c "import torch_xla" - name: Checkout PyTorch Repo uses: actions/checkout@v4 with: diff --git a/.github/workflows/_test_requiring_torch_cuda.yml b/.github/workflows/_test_requiring_torch_cuda.yml index 466d72b84fe..a2440230f6d 100644 --- a/.github/workflows/_test_requiring_torch_cuda.yml +++ b/.github/workflows/_test_requiring_torch_cuda.yml @@ -47,38 +47,20 @@ jobs: BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 steps: - # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - # TODO: need to find a way to reuse these steps. - - name: Clean up workspace - run: | - ls -la - rm -rvf ${GITHUB_WORKSPACE}/* - - name: Fetch torch/torch_xla/torchvision wheels - uses: actions/download-artifact@v4 - with: - name: torch-xla-wheels - path: /tmp/wheels/ - - name: Remove torch wheel built with CUDA disabled - shell: bash - run: | - rm -rf /tmp/wheels/torch-* - - name: Fetch the torch wheel built with CUDA enabled - uses: actions/download-artifact@v4 + - name: Checkout actions + uses: actions/checkout@v4 with: - name: torch-with-cuda - path: /tmp/wheels/ - - name: Fetch CUDA plugin - uses: actions/download-artifact@v4 + sparse-checkout: | + .github/workflows/setup + path: .actions + - name: Setup + uses: ./.actions/.github/workflows/setup with: - name: cuda-plugin - path: /tmp/wheels/ - - name: Setup CUDA environment - shell: bash - run: | - echo "XLA_REGISTER_INSTALLED_PLUGINS=1" >> $GITHUB_ENV - - echo "PATH=$PATH:/usr/local/cuda-12.1/bin" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.1/lib64" >> $GITHUB_ENV + torch-commit: ${{ inputs.torch-commit }} + cuda: true + wheels-artifact: torch-xla-wheels + cuda-plugin-artifact: cuda-plugin + cuda-torch-artifact: torch-with-cuda - name: Check GPU run: nvidia-smi - name: Install wheels diff --git a/.github/workflows/_tpu_ci.yml b/.github/workflows/_tpu_ci.yml index bfe9359cf15..781a08aec2b 100644 --- a/.github/workflows/_tpu_ci.yml +++ b/.github/workflows/_tpu_ci.yml @@ -5,19 +5,17 @@ jobs: tpu-test: runs-on: v4-runner-set steps: - - name: Checkout repo + - name: Checkout actions uses: actions/checkout@v4 with: - path: pytorch/xla - - name: Fetch wheels - uses: actions/download-artifact@v4 + sparse-checkout: | + .github/workflows/setup + path: .actions + - name: Setup + uses: ./.actions/.github/workflows/setup with: - name: torch-xla-wheels - path: /tmp/wheels/ - - name: Install wheels - shell: bash - run: | - pip install /tmp/wheels/*.whl + torch-commit: ${{ inputs.torch-commit }} + wheels-artifact: torch-xla-wheels - name: Install test dependencies shell: bash run: | diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index dcdd9a25177..af95dc955b6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -21,8 +21,14 @@ concurrency: jobs: get-torch-commit: - name: "Get torch commit" - uses: ./.github/workflows/_get_torch_commit.yml + runs-on: ubuntu-latest + outputs: + torch_commit: ${{ steps.commit.outputs.torch_commit }} + steps: + - id: commit + name: Get latest torch commit + run: | + echo "torch_commit=$(git ls-remote https://github.com/pytorch/pytorch.git HEAD | awk '{print $1}')" >> "$GITHUB_OUTPUT" build-torch-xla: name: "Build PyTorch/XLA" diff --git a/.github/workflows/setup/action.yml b/.github/workflows/setup/action.yml new file mode 100644 index 00000000000..6953f99cac3 --- /dev/null +++ b/.github/workflows/setup/action.yml @@ -0,0 +1,86 @@ +name: Set up PyTorch/XLA +inputs: + torch-commit: + type: string + description: PyTorch commit to check out, if provided + cuda: + type: boolean + description: Whether to set up CUDA library paths + default: false + wheels-artifact: + type: string + description: | + Artifact containing `torch` (cpu) and `torch-xla` wheels to install + cuda-plugin-artifact: + type: string + description: Artifact containing `torch-xla-cuda-plugin` to install + cuda-torch-artifact: + type: string + description: Artifact containing CUDA build of `torch` +runs: + using: "composite" + steps: + # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 + - name: Clean up workspace + shell: bash + run: | + ls -la + rm -rvf ${GITHUB_WORKSPACE}/* + - name: Setup CUDA environment + shell: bash + run: | + echo "PATH=$PATH:/usr/local/cuda-12.1/bin" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.1/lib64" >> $GITHUB_ENV + if: ${{ inputs.cuda }} + - name: Setup gcloud + shell: bash + run: | + echo "${GCLOUD_SERVICE_KEY}" > /tmp/default_credentials.json + echo "GOOGLE_APPLICATION_CREDENTIALS=/tmp/default_credentials.json" >> $GITHUB_ENV + # GCLOUD_SERVICE_KEY needs to be set from the outside because for some + # reason composite actions don't support secrets. + # https://docs.github.com/en/actions/using-workflows/avoiding-duplication + if: ${{ env.GCLOUD_SERVICE_KEY }} + - name: Checkout PyTorch Repo + uses: actions/checkout@v4 + with: + repository: pytorch/pytorch + path: pytorch + ref: ${{ inputs.torch-commit }} + submodules: recursive + if: ${{ inputs.torch-commit }} + - name: Checkout PyTorch/XLA Repo + uses: actions/checkout@v4 + with: + path: pytorch/xla + - name: Fetch PyTorch/XLA packages + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.wheels-artifact }} + path: /tmp/wheels/ + if: ${{ inputs.wheels-artifact }} + - name: Fetch CUDA plugin + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.cuda-plugin-artifact }} + path: /tmp/wheels/ + if: ${{ inputs.cuda-plugin-artifact }} + - name: Remove CPU `torch` build + shell: bash + run: | + rm -rf /tmp/wheels/torch-* + if: ${{ inputs.cuda-torch-artifact }} + - name: Fetch CUDA `torch` build + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.cuda-torch-artifact }} + path: /tmp/wheels/ + if: ${{ inputs.cuda-torch-artifact }} + - name: Install wheels + shell: bash + run: | + pip install /tmp/wheels/*.whl + + echo "Import check..." + python -c "import torch_xla" + if: ${{ inputs.wheels-artifact }}