Skip to content

Commit

Permalink
Add ansible stage for building CUDA plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
will-cromar committed Apr 10, 2024
1 parent 786484d commit 4d7b5bc
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 10 deletions.
56 changes: 56 additions & 0 deletions .github/workflows/_build_plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: build-cuda-plugin
on:
workflow_call:
inputs:
dev-image:
required: true
type: string
description: Base image for builds
runner:
required: false
type: string
description: Runner type for the test
default: linux.12xlarge
cuda:
required: false
type: string
description: Whether to build XLA with CUDA
default: 1

secrets:
gcloud-service-key:
required: true
description: Secret to access Bazel build cache

outputs:
docker-image:
value: ${{ jobs.build.outputs.docker-image }}
description: The docker image containing the built PyTorch.
jobs:
build:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.dev-image }}
volumes:
- ${GITHUB_WORKSPACE}/dist/:/dist/
env:
GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }}
BAZEL_JOBS: 16
BAZEL_REMOTE_CACHE: 1
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup gcloud
shell: bash
run: |
echo "${GCLOUD_SERVICE_KEY}" >> default_credentials.json
- name: Build
shell: bash
run: |
cd infra/ansible
# TODO: don't clone everything again
ansible-playbook playbook.yaml -e "stage=build_cuda arch=amd64 accelerator=cuda" --skip-tags=install_deps
- name: actions/upload-artifact@v4
with:
name: cuda-plugin
path: ${GITHUB_WORKSPACE}/dist/*.whl
5 changes: 5 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ jobs:
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

build-cuda-plugin:
name: "Build XLA CUDA plugin"
with:
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.3

test-cpu:
name: "CPU tests"
uses: ./.github/workflows/_test.yml
Expand Down
15 changes: 14 additions & 1 deletion infra/ansible/playbook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"Pass the required variable with: --e \"{{ item.name }}=<value>\""
loop:
- name: stage
pattern: ^(build|release)$
pattern: ^(build|build_cuda|release)$
- name: arch
pattern: ^(aarch64|amd64)$
- name: accelerator
Expand Down Expand Up @@ -73,6 +73,7 @@
src_root: "/src"
tags: fetch_srcs

# TODO: Rename this to e.g. `build_package`
- role: build_srcs
vars:
src_root: "/src"
Expand All @@ -81,8 +82,20 @@
combine(build_env[arch] | default({}, true)) |
combine(build_env[accelerator] | default({}, true))
}}"
when: stage == "build"
tags: build_srcs

- role: build_cuda
vars:
src_root: "/src"
env_vars: "{{
build_env.common | default({}, true) |
combine(build_env[arch] | default({}, true)) |
combine(build_env[accelerator] | default({}, true))
}}"
when: stage == "build_cuda"
tags: build_cuda

- role: configure_env
vars:
env_vars: "{{
Expand Down
23 changes: 23 additions & 0 deletions infra/ansible/roles/build_cuda/tasks/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
- name: Create /dist directory for exported wheels
ansible.builtin.file:
path: /dist
state: directory
mode: '0755'

- name: Build PyTorch/XLA CUDA Plugin
ansible.builtin.command:
cmd: pip wheel -w /dist plugins/cuda -v
chdir: "{{ (src_root, 'pytorch/xla') | path_join }}"
environment: "{{ env_vars }}"
when: accelerator == "cuda"

- name: Find XLA *.whl files in pytorch/xla/dist
ansible.builtin.find:
path: "/dist"
pattern: "torch_xla_cuda_plugin*.whl"
register: cuda_plugin_wheels

- name: Install XLA wheels
ansible.builtin.pip:
name: "{{ cuda_plugin_wheels.files | map(attribute='path') }}"
state: "forcereinstall"
7 changes: 0 additions & 7 deletions infra/ansible/roles/build_srcs/tasks/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@
chdir: "{{ (src_root, 'pytorch/xla') | path_join }}"
environment: "{{ env_vars }}"

- name: Build PyTorch/XLA CUDA Plugin
ansible.builtin.command:
cmd: pip wheel -w dist plugins/cuda -v
chdir: "{{ (src_root, 'pytorch/xla') | path_join }}"
environment: "{{ env_vars }}"
when: accelerator == "cuda"

- name: Find XLA *.whl files in pytorch/xla/dist
ansible.builtin.find:
path: "{{ (src_root, 'pytorch/xla/dist') | path_join }}"
Expand Down
2 changes: 1 addition & 1 deletion plugins/cuda/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ build-backend = "setuptools.build_meta"

[project]
name = "torch_xla_cuda_plugin"
version = "0.0.1"
authors = [
{name = "PyTorch/XLA Dev Team", email = "[email protected]"},
]
description = "PyTorch/XLA CUDA Plugin"
requires-python = ">=3.8"
dynamic = ["version"]

[tool.setuptools.package-data]
torch_xla_cuda_plugin = ["lib/*.so"]
Expand Down
6 changes: 5 additions & 1 deletion plugins/cuda/setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import os
import sys

Expand All @@ -10,4 +11,7 @@
build_util.bazel_build('@xla//xla/pjrt/c:pjrt_c_api_gpu_plugin.so',
'torch_xla_cuda_plugin/lib', ['--config=cuda'])

setuptools.setup()
setuptools.setup(
# TODO: Use a common version file
version=f'2.4.0.dev{datetime.date.today().strftime("%Y%m%d")}'
)

0 comments on commit 4d7b5bc

Please sign in to comment.