Skip to content

Commit

Permalink
rework this a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
neggles committed Sep 27, 2023
1 parent c7e1e24 commit 8cc38d8
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 46 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/build-push-gradient.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ jobs:
include:
- name: "notebook"
target: "gradient"
torch-ver: "2.0.1"
torch-ver: "torch201"
- name: "notebook"
target: "gradient"
torch-ver: "torch210"

steps:
- name: Checkout
Expand Down Expand Up @@ -119,7 +122,7 @@ jobs:
id: build-push
uses: docker/bake-action@v3
with:
targets: ${{ matrix.target }}
targets: ${{ matrix.target }}-${{ matrix.torch-ver }}
files: |
./docker-bake.hcl
${{ steps.meta.outputs.bake-file }}
Expand Down
98 changes: 73 additions & 25 deletions docker-bake.hcl
Original file line number Diff line number Diff line change
@@ -1,76 +1,124 @@
# docker-bake.hcl for stable-diffusion-webui
group "default" {
targets = ["gradient"]
targets = ["gradient-torch201"]
}

variable "IMAGE_REGISTRY" {
group torchrc {
targets = ["gradient-torch210"]
}

variable IMAGE_REGISTRY {
default = "ghcr.io"
}

variable "IMAGE_NAME" {
variable IMAGE_NAMESPACE {
default = "neggles/psychic-paper"
}

variable "BASE_IMAGE" {
default = "nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04"
variable CUDA_VERSION {
default = "12.1.1"
}

variable "CUDA_VERSION" {
default = "12.1"
variable TORCH_CUDA_ARCH_LIST {
default = "7.0;7.5;8.0;8.6;8.9;9.0"
}

variable "TORCH_VERSION" {
default = "torch"
# removes characters not valid in a target name, useful for other things too
function stripName {
params = [name]
result = regex_replace(name, "[^a-zA-Z0-9_-]+", "")
}

variable "TORCH_INDEX" {
default = "https://download.pytorch.org/whl/cu118"
# convert a CUDA version number and container dev type etc. into an image URI
function cudaImage {
params = [cudaVer, cudaType]
variadic_params = extraVals
result = join(":", [
"nvidia/cuda",
join("-", [cudaVer], extraVals, [cudaType, "ubuntu22.04"])
])
}

variable "TORCH_CUDA_ARCH_LIST" {
default = "7.0;7.5;8.0;8.6;8.9;9.0"
# convert a CUDA version number into a release number (e.g. 11.2.1 -> 11-2)
function cudaRelease {
params = [version]
result = regex_replace(version, "^(\\d+)\\.(\\d).*", "$1-$2")
}

variable "XFORMERS_VERSION" {
default = "xformers==0.0.21"
# build a tag for an image from this repo
function repoImage {
params = [imageName]
variadic_params = extraVals
result = join(":", [
join("/", [IMAGE_REGISTRY, IMAGE_NAMESPACE, imageName]),
join("-", extraVals)
])
}

# set to "true" by github actions, used to disable auto-tag
variable CI { default = "" }

# docker-metadata-action will populate this in GitHub Actions
target "docker-metadata-action" {}
target docker-metadata-action {}

# Shared amongst all containers
target "common" {
target common {
context = "./docker"
args = {
CUDA_VERSION = CUDA_VERSION
CUDA_RELEASE = "${regex_replace(CUDA_VERSION, "\\.", "-")}"
CUDA_RELEASE = cudaRelease(CUDA_VERSION)

TORCH_CUDA_ARCH_LIST = TORCH_CUDA_ARCH_LIST
}
platforms = ["linux/amd64"]
}

# Base image with cuda, python, torch, and other dependencies
target "base" {
target base-torch201 {
inherits = ["common", "docker-metadata-action"]
dockerfile = "Dockerfile.base"
target = "base"
target = "base-xformers-bin"
args = {
TORCH_INDEX = TORCH_INDEX
TORCH_VERSION = TORCH_VERSION
TORCH_INDEX = "https://download.pytorch.org/whl/cu118"
TORCH_PACKAGE = "torch"
EXTRA_PIP_ARGS = ""

XFORMERS_VERSION = XFORMERS_VERSION
XFORMERS_PACKAGE = "xformers==0.0.21"
}
}

target base-torch210 {
inherits = ["common", "docker-metadata-action"]
dockerfile = "Dockerfile.base"
target = "base-xformers-ghcr"
args = {
TORCH_INDEX = "https://download.pytorch.org/whl/test/cu121"
TORCH_PACKAGE = "torch"
EXTRA_PIP_ARGS = ""

XFORMERS_PACKAGE = "ghcr.io/neggles/tensorpods/xformers:v0.0.21-cu121-torch210"
}
}

# Paperspace Gradient image
target "gradient" {
target gradient-torch201 {
inherits = ["common", "docker-metadata-action"]
dockerfile = "Dockerfile.gradient"
target = "gradient"
contexts = {
base = "target:base-torch201"
}
args = {
NODE_MAJOR = 18
}
}

target gradient-torch210 {
inherits = ["common", "docker-metadata-action"]
dockerfile = "Dockerfile.gradient"
target = "gradient"
contexts = {
base = "target:base"
base = "target:base-torch210"
}
args = {
NODE_MAJOR = 18
Expand Down
57 changes: 41 additions & 16 deletions docker/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ARG BASE_IMAGE=nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
ARG DEBIAN_FRONTEND=noninteractive
ARG DEBIAN_PRIORITY=critical
ARG PIP_PREFER_BINARY=1
ARG TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0"
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"

# Build the base image.
FROM ${BASE_IMAGE} as base
Expand Down Expand Up @@ -101,12 +101,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
&& apt-get clean

# Install TensorRT libraries
ARG INCLUDE_TRT=true
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update \
&& apt-get -y install --no-install-recommends \
libnvinfer-dev \
python3-libnvinfer-dev \
&& if [ "${INCLUDE_TRT}" == "true" ]; then \
apt-get -y install --no-install-recommends \
libnvinfer-dev \
python3-libnvinfer-dev \
; fi \
&& apt-get clean

# Install other CUDA libraries
Expand All @@ -133,22 +136,44 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \

# Install PyTorch
ARG TORCH_INDEX
ARG TORCH_VERSION
ARG EXTRA_PIP_ARGS
ARG TORCH_PACKAGE="torch"
ARG TRITON_PACKAGE=" "
ARG EXTRA_PIP_ARGS=" "
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
python -m pip install ${EXTRA_PIP_ARGS} \
--extra-index-url ${TORCH_INDEX} \
"${TORCH_VERSION:-torch}" \
python -m pip install ${EXTRA_PIP_ARGS:-} \
${TORCH_PACKAGE} \
${TRITON_PACKAGE} \
torchaudio \
torchvision
torchvision \
--index-url "${TORCH_INDEX}"

# Install xformers
ARG XFORMERS_VERSION
# save and enforce a constraint file to lock the torch version
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
python -m pip install "${XFORMERS_VERSION}"
python -m pip freeze | grep -E '(^torch|triton)' > /torch-constraints.txt
ENV PIP_CONSTRAINT=/torch-constraints.txt

# set work dir
WORKDIR /workspace

# we do a little entrypoint setup
#
CMD ["/bin/bash", "-l"]

# Specific required versions for everything else will be installed in their respective images
# since this stuff tends to be pretty picky about versioning.

# can use this target if there's a prebuilt wheel available for this torch version
FROM base as base-xformers-bin

# Install xformers
ARG XFORMERS_PACKAGE="xformers"
ARG EXTRA_PIP_ARGS=""
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
python -m pip install ${EXTRA_PIP_ARGS:-} "${XFORMERS_PACKAGE}"


# or this one if we're doing mf war crimes
FROM base AS base-xformers-ghcr
ARG XFORMERS_PACKAGE='ghcr.io/neggles/tensorpods/xformers:v0.0.21-cu121-torch210'
ARG EXTRA_PIP_ARGS=""

RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
--mount=type=bind,from=${XFORMERS_PACKAGE},source=/xformers,dst=/xformers \
python -m pip install ${EXTRA_PIP_ARGS:-} /xformers/xformers*.whl
8 changes: 5 additions & 3 deletions docker/Dockerfile.gradient
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
ARG DEBIAN_FRONTEND=noninteractive
ARG DEBIAN_PRIORITY=critical
ARG PIP_PREFER_BINARY=1
ARG TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0"
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"

FROM base AS gradient

Expand Down Expand Up @@ -59,7 +59,9 @@ ENV TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1
ENV CUDA_MODULE_LOADING=LAZY
ENV TCMALLOC_AGGRESSIVE_DECOMMIT=t

# we're not changing the entrypoint since nVidia's default one works fine
# paperspace default working dir
WORKDIR /notebooks

# default command
# default command; nvidia's entrypoint.sh is fine so we don't touch ENTRYPOINT
# n.b. when launching the image on gradient this will be overridden with a jupyter start command
CMD [ "/usr/bin/env", "bash", "-l" ]

0 comments on commit 8cc38d8

Please sign in to comment.