Skip to content

Commit

Permalink
add workflow for test that requires pytorch CUDA.
Browse files Browse the repository at this point in the history
  • Loading branch information
vanbasten23 committed May 16, 2024
1 parent aeed89e commit 3989e0c
Showing 4 changed files with 82 additions and 2 deletions.
59 changes: 59 additions & 0 deletions .github/workflows/_build_torch_with_cuda_and_xla.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: build-cuda-plugin
on:
workflow_call:
inputs:
dev-image:
required: true
type: string
description: Base image for builds
runner:
required: false
type: string
description: Runner type for the test
default: linux.12xlarge

secrets:
gcloud-service-key:
required: true
description: Secret to access Bazel build cache
jobs:
build:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.dev-image }}
env:
GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }}
GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json
BAZEL_JOBS: 16
BAZEL_REMOTE_CACHE: 1
BUILD_CPP_TESTS: 1
steps:
- name: Setup gcloud
shell: bash
run: |
echo "${GCLOUD_SERVICE_KEY}" > $GOOGLE_APPLICATION_CREDENTIALS
- name: Checkout PyTorch Repo
uses: actions/checkout@v4
with:
repository: pytorch/pytorch
path: pytorch
submodules: recursive
- name: Checkout PyTorch/XLA Repo
uses: actions/checkout@v4
with:
path: pytorch/xla
- name: Build
shell: bash
run: |
cd pytorch/xla/infra/ansible
ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=cuda cuda_compute_capabilities=5.2,7.5 src_root=${GITHUB_WORKSPACE} build_cpp_tests=1 git_versioned_xla_build=1 cache_suffix=-ci build_pytorch_with_cuda=1" --skip-tags=fetch_srcs,install_deps
- name: Upload wheel
uses: actions/upload-artifact@v4
with:
name: torch-xla-wheels
path: /dist/*.whl
- name: Upload CPP test binaries
uses: actions/upload-artifact@v4
with:
name: cpp-test-bin
path: /tmp/test/bin
21 changes: 21 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -28,6 +28,14 @@ jobs:
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

build-torch-with-cuda-xla-with-cuda:
name: "Build PyTorch with CUDA and PyTorch/XLA"
uses: ./.github/workflows/_build_torch_with_cuda_and_xla.yml
with:
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

build-cuda-plugin:
name: "Build XLA CUDA plugin"
uses: ./.github/workflows/_build_plugin.yml
@@ -50,6 +58,19 @@ jobs:
test-cuda:
name: "GPU tests"
uses: ./.github/workflows/_test.yml
needs: [build-torch-with-cuda-xla-with-cuda]
with:
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1
runner: linux.8xlarge.nvidia.gpu
timeout-minutes: 300
collect-coverage: false
install-cuda-plugin: true
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-cuda-with-pytorch-cuda-enabled:
name: "GPU tests with PyTorch CUDA enabled"
uses: ./.github/workflows/_test.yml
needs: [build-torch-xla, build-cuda-plugin]
with:
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1
1 change: 1 addition & 0 deletions infra/ansible/config/env.yaml
Original file line number Diff line number Diff line change
@@ -43,6 +43,7 @@ build_env:
cuda:
TF_CUDA_COMPUTE_CAPABILITIES: "{{ cuda_compute_capabilities }}"
XLA_CUDA: 1
USE_CUDA: "{{ build_pytorch_with_cuda }}"

tpu:
ACCELERATOR: tpu
3 changes: 1 addition & 2 deletions infra/ansible/roles/build_srcs/tasks/main.yaml
Original file line number Diff line number Diff line change
@@ -27,8 +27,7 @@
cmd: python setup.py bdist_wheel
chdir: "{{ (src_root, 'pytorch') | path_join }}"
creates: "{{ (src_root, 'pytorch/dist/*.whl') | path_join }}"
# Set `USE_CUDA=0` as PyTorch cannot be used with GPU in eager and XLA mode.
environment: "{{ env_vars | combine({'USE_CUDA': 0}) }}"
environment: "{{ env_vars }}"

- name: Find PyTorch *.whl files in pytorch/dist
ansible.builtin.find:

0 comments on commit 3989e0c

Please sign in to comment.