Enable PagedAttention through Pallas #7764

Workflow file for this run

.github/workflows/build_and_test.yml at ca36c35

	name: Build and test
	on:
	pull_request:
	branches:
	- master
	- r[0-9]+.[0-9]+
	paths-ignore:
	- 'experimental/torch_xla2/**'
	push:
	branches:
	- master
	- r[0-9]+.[0-9]+
	paths-ignore:
	- 'experimental/torch_xla2/**'
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	# Old CI workflow
	build:
	name: "Build PyTorch/XLA (GPU)"
	uses: ./.github/workflows/_build.yml
	with:
	ecr-docker-image-base: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base
	gcr-docker-image: gcr.io/tpu-pytorch/xla_base:dev-3.8_cuda_12.1
	cuda: 1
	secrets:
	gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

	test-cpp-cpu:
	name: "CPU C++ tests"
	uses: ./.github/workflows/_test_cpp.yml
	needs: build
	with:
	docker-image: ${{ needs.build.outputs.docker-image }}
	timeout-minutes: 120
	collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics
	secrets:
	gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

	test-cpp-cuda:
	name: "GPU C++ tests"
	uses: ./.github/workflows/_test_cpp.yml
	needs: build
	with:
	docker-image: ${{ needs.build.outputs.docker-image }}
	runner: linux.8xlarge.nvidia.gpu
	timeout-minutes: 300
	collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics
	secrets:
	gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

	push-docs:
	name: "Build & publish docs"
	if: github.event_name == 'push' && (github.event.ref == 'refs/heads/master' \|\| startsWith(github.event.ref, 'refs/tags/r'))
	uses: ./.github/workflows/_docs.yml
	needs: build
	with:
	docker-image: ${{ needs.build.outputs.docker-image }}
	secrets:
	torchxla-bot-token: ${{ secrets.TORCH_XLA_BOT_TOKEN }}

	# New CI workflow
	build-torch-xla:
	name: "Build PyTorch/XLA (TPU)"
	uses: ./.github/workflows/_build_torch_xla.yml
	with:
	dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm
	secrets:
	gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

	build-cuda-plugin:
	name: "Build XLA CUDA plugin"
	uses: ./.github/workflows/_build_plugin.yml
	with:
	dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1
	secrets:
	gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

	test-python-cpu:
	name: "CPU Python tests"
	uses: ./.github/workflows/_test_python.yml
	needs: build-torch-xla
	with:
	dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm
	timeout-minutes: 120
	collect-coverage: false
	secrets:
	gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

	test-python-cuda:
	name: "GPU Python tests"
	uses: ./.github/workflows/_test_python.yml
	needs: [build-torch-xla, build-cuda-plugin]
	with:
	dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1
	runner: linux.8xlarge.nvidia.gpu
	timeout-minutes: 300
	collect-coverage: false
	install-cuda-plugin: true
	secrets:
	gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

	test-tpu:
	name: "TPU tests"
	uses: ./.github/workflows/_tpu_ci.yml
	needs: build-torch-xla
	# Only run this for HEAD and releases
	if: github.event_name == 'push'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Enable PagedAttention through Pallas #7764

Workflow file

Enable PagedAttention through Pallas #7764

Jobs

Run details

Workflow file for this run