Skip to content

Commit

Permalink
this probably still won't work
Browse files Browse the repository at this point in the history
  • Loading branch information
neggles committed Sep 25, 2023
1 parent e1f0a3c commit c26b671
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 51 deletions.
41 changes: 22 additions & 19 deletions .github/workflows/build-ngc-xformers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ concurrency:
cancel-in-progress: true

env:
IMAGE_REGISTRY: ghcr.io
IMAGE_NAME: xformers
IMAGE_NAMESPACE: neggles/tensorpods
IMAGE_REGISTRY: ghcr.io
IMAGE_NAMESPACE: ${{ github.repository }}
IMAGE_PLATFORMS: "linux/amd64"
BAKEFILE_NAME: docker-bake.xformers.hcl

# sorry pascal users but your cards are no good here
TORCH_CUDA_ARCH_LIST: "7.0;7.5;8.0;8.6;8.9;9.0"
# sorry pascal/volta users but GH only lets me compile for 6 hours
TORCH_CUDA_ARCH_LIST: "7.5;8.0;8.6;8.9;9.0"
# for ninja so the runner doesn't explode
MAX_JOBS: 1
# NVCC my behated
Expand All @@ -71,17 +72,21 @@ jobs:
max-parallel: 1
matrix:
include:
- target: "xformers"
- target: "xformers-v0021-ngc2308"
ngc-ver: "23.08"
xformers-ver: "tensorpods-v0.0.21"
xformers-ver: "v0.0.21"

- target: "xformers"
- target: "xformers-v0021-ngc2307"
ngc-ver: "23.07"
xformers-ver: "v0.0.21"

- target: "xformers-dev-ngc2308"
ngc-ver: "23.08"
xformers-ver: "tensorpods"
xformers-ver: "dev"

- target: "xformers"
- target: "xformers-dev-ngc2307"
ngc-ver: "23.07"
xformers-ver: "tensorpods"
xformers-ver: "dev"

steps:
- name: Checkout
Expand All @@ -103,18 +108,19 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Print bakefile before metadata-action
run: docker buildx bake -f ./${{ env.BAKEFILE_NAME }} --print

- name: Generate docker tags
id: meta
uses: docker/metadata-action@v4
with:
flavor: |
suffix=-ngc${{ matrix.ngc-ver }}
suffix=-${{ matrix.xformers-ver }}-ngc${{ matrix.ngc-ver }}
images: |
${{ env.IMAGE_REGISTRY }}/${{ github.repository }}/${{ matrix.target }}
${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/${{ env.IMAGE_NAME }}
tags: |
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=${{ matrix.xformers-ver }},enable={{is_default_branch}}
type=schedule,pattern={{date 'YYYYMMDD-hhmm' tz='UTC'}},enable={{is_default_branch}}
type=raw,value=${{ matrix.xformers-ver }},enable={{is_default_branch}},suffix=-ngc${{ matrix.ngc-ver }}
type=sha,format=short
type=ref,event=tag
type=ref,event=pr
Expand All @@ -135,13 +141,10 @@ jobs:
- name: Build & Push Image
id: build-push
uses: docker/bake-action@v3
env:
NGC_VERSION: ${{ matrix.ngc-ver }}
XFORMERS_REF: ${{ matrix.xformers-ver }}
with:
targets: ${{ matrix.target }}
files: |
./docker-bake.xformers.hcl
./${{ env.BAKEFILE_NAME }}
${{ steps.meta.outputs.bake-file }}
push: ${{ contains(fromJSON('["push", "schedule"]'), github.event_name) || inputs.force-push }}
set: |
Expand Down
133 changes: 101 additions & 32 deletions docker-bake.xformers.hcl
Original file line number Diff line number Diff line change
@@ -1,71 +1,140 @@
# docker-bake.hcl for tensorpod builds
group "default" {
targets = ["base"]
targets = ["xformers"]
}

variable "IMAGE_REGISTRY" {
default = "ghcr.io"
}

variable "IMAGE_NAMESPACE" {
variable IMAGE_NAMESPACE {
default = "neggles/tensorpods"
}

variable "IMAGE_NAME" {
default = "xformers"
variable TORCH_CUDA_ARCH_LIST {
# sorry pascal users but your cards are no good here
# n.b. in GH builds volta is not available due to compile timeouts
default = "7.0;7.5;8.0;8.6;8.9;9.0"
}

variable "NGC_VERSION" {
default = "23.08"
variable MAX_JOBS {
default = "8"
}

variable "XFORMERS_REPO" {
default = "https://github.com/neggles/xformers.git"
variable "NVCC_THREADS" {
default = "1"
}

variable "XFORMERS_REF" {
default = "tensorpods"
# removes characters not valid in a target name, useful for other things too
function stripName {
params = [name]
result = regex_replace(name, "[^a-zA-Z0-9_-]+", "")
}

variable "TORCH_CUDA_ARCH_LIST" {
# sorry pascal users but your cards are no good here
default = "7.0;7.5;8.0;8.6;8.9;9.0"
# convert a CUDA version number and container dev type etc. into an image URI
function cudaImage {
params = [cudaVer, cudaType]
variadic_params = extraVals
result = join(":", [
"nvidia/cuda",
join("-", [cudaVer], extraVals, [cudaType, "ubuntu22.04"])
])
}

variable "MAX_JOBS" {
default = "1"
# convert a CUDA version number into a shortname (e.g. 11.2.1 -> cu112)
function cudaName {
params = [version]
result = regex_replace(version, "^(\\d+)\\.(\\d).*", "cu$1$2")
}

function "imagetag" {
params = [imagename, tag]
result = "${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/${imagename}:${tag}-ngc${NGC_VERSION}"
# convert a CUDA version number into a release number (e.g. 11.2.1 -> 11-2)
function cudaRelease {
params = [version]
result = regex_replace(version, "^(\\d+)\\.(\\d).*", "$1-$2")
}

# torch version to torch name
function torchName {
params = [version]
result = regex_replace(version, "^(\\d+)\\.(\\d+)\\.(\\d+).*", "torch$1$2$3")
}

# build a tag for an image from this repo
function repoImage {
params = [imageName]
variadic_params = extraVals
result = join(":", [
join("/", [IMAGE_REGISTRY, IMAGE_NAMESPACE, imageName]),
join("-", extraVals)
])
}

# set to "true" by github actions, used to disable auto-tag
variable "CI" { default = "" }

# docker-metadata-action will populate this in GitHub Actions
target "docker-metadata-action" {}

# Shared amongst all containers
target "common" {
context = "."
contexts = {
ngc = "docker-image://nvcr.io/nvidia/pytorch:${NGC_VERSION}-py3"
}
context = "."
dockerfile = "Dockerfile"
args = {
XFORMERS_IMAGE = "xformers"
BASE_IMAGE = "ngc"
TORCH_CUDA_ARCH_LIST = TORCH_CUDA_ARCH_LIST
MAX_JOBS = MAX_JOBS
NVCC_THREADS = NVCC_THREADS
}
platforms = ["linux/amd64"]
output = [
"type=docker",
]
}

target "xformers" {
inherits = ["common", "docker-metadata-action"]
context = "docker/xformers"
dockerfile = "Dockerfile"
target = "xformers"
name = stripName("xformers-${xformers.version}-${base.type}${base.version}")
inherits = ["common", "docker-metadata-action"]
context = "docker/xformers"
target = "xformers"
contexts = {
base-ngc = "docker-image://nvcr.io/nvidia/pytorch:${base.version}-py3"
base-torch = "docker-image://${repoImage("base", cudaName(base.cuda), base.type, base.version)}"
}
matrix = {
base = [
{
type = "ngc"
version = "23.08"
cuda = "12.2.1"
},
{
type = "ngc"
version = "23.07"
cuda = "12.1.1"
},
],
xformers = [
{
version = "v0.0.21",
repo = "https://github.com/neggles/xformers.git"
ref = "tensorpods-v0.0.21"
buildtype = "release"
},
{
version = "dev"
repo = "https://github.com/neggles/xformers.git"
ref = "tensorpods"
buildtype = "release"
}
]
}
tags = [
notequal("true", CI) ? repoImage("xformers", xformers.version, "${base.type}${base.version}") : ""
]
args = {
XFORMERS_REPO = XFORMERS_REPO
XFORMERS_REF = XFORMERS_REF
TORCH_CUDA_ARCH_LIST = TORCH_CUDA_ARCH_LIST
MAX_JOBS = MAX_JOBS
BASE_IMAGE = "base-${base.type}"

XFORMERS_REPO = xformers.repo
XFORMERS_REF = xformers.ref
XFORMERS_BUILD_TYPE = xformers.buildtype
}
}

0 comments on commit c26b671

Please sign in to comment.