Skip to content

Commit

Permalink
add ghcr and update build matrix generator (#3465)
Browse files Browse the repository at this point in the history
Co-authored-by: Mihir Patel <[email protected]>
  • Loading branch information
KevDevSha and mvpatel2000 authored Aug 21, 2024
1 parent 7c48cfc commit dec879e
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 39 deletions.
30 changes: 18 additions & 12 deletions .github/workflows/docker-configure-build-push.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Docker Image Configure-Build-Push
name: Docker/GHCR Image Configure-Build-Push
on:
workflow_call:
inputs:
Expand All @@ -23,6 +23,9 @@ on:
staging-repo:
required: false
type: string
ghcr-staging-repo:
required: false
type: string
tags:
required: true
type: string
Expand All @@ -34,18 +37,14 @@ on:
required: true
password:
required: true
ghcr_username:
required: true
ghcr_password:
required: true
jobs:
configure-build-push:
runs-on: mosaic-4wide
steps:
- name: Maximize Build Space on Worker
uses: easimon/maximize-build-space@v4
with:
overprovision-lvm: true
remove-dotnet: true
remove-android: true
remove-haskell: true

- name: Checkout
uses: actions/checkout@v3

Expand All @@ -60,7 +59,12 @@ jobs:
with:
username: ${{ secrets.username }}
password: ${{ secrets.password }}

- name: Login to GHCR
uses: docker/login-action@v3
with:
username: ${{ secrets.ghcr_username }}
password: ${{ secrets.ghcr_password }}
registry: ghcr.io
- name: Calculate Docker Image Variables
run: |
set -euo pipefail
Expand All @@ -70,7 +74,8 @@ jobs:
###################
if [ "${{ inputs.staging }}" = "true" ]; then
STAGING_REPO=${{ inputs.staging-repo }}
IMAGE_TAG=${STAGING_REPO}:${{ inputs.image-uuid }}
GHCR_STAGING_REPO=${{ inputs.ghcr-staging-repo }}
IMAGE_TAG=${STAGING_REPO}:${{ inputs.image-uuid }},${GHCR_STAGING_REPO}:${{ inputs.image-uuid }}
IMAGE_CACHE="${STAGING_REPO}:${{ inputs.image-name }}-buildcache"
else
IMAGE_TAG=${{ inputs.tags }}
Expand All @@ -81,7 +86,8 @@ jobs:
echo "IMAGE_CACHE=${IMAGE_CACHE}" >> ${GITHUB_ENV}
- name: IMAGE_TAG = ${{ env.IMAGE_TAG }}
run: echo ${{ env.IMAGE_TAG }}
run: |
echo ${{ env.IMAGE_TAG }}
- name: Build and Push the Docker Image
uses: docker/build-push-action@v3
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/pr-docker.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: PR Docker
name: PR Docker/GHCR
on:
pull_request:
branches:
Expand All @@ -16,7 +16,7 @@ defaults:
jobs:
build-image-matrix:
if: github.repository_owner == 'mosaicml'
runs-on: ubuntu-latest
runs-on: linux-ubuntu-latest
timeout-minutes: 2
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
Expand Down Expand Up @@ -64,8 +64,11 @@ jobs:
push: true
staging: true
staging-repo: mosaicml/ci-staging
ghcr-staging-repo: ghcr.io/databricks-mosaic/ci-staging
tags: ${{ matrix.TAGS }}
target: ${{ matrix.TARGET }}
secrets:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_PASSWORD }}
ghcr_username: ${{ secrets.GHCR_USERNAME }}
ghcr_password: ${{ secrets.GHCR_TOKEN }}
10 changes: 8 additions & 2 deletions .github/workflows/release-docker.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Release Docker Images
name: Release Docker/GHCR Images

on:
workflow_dispatch:
Expand All @@ -8,6 +8,10 @@ on:
required: true
DOCKER_HUB_PASSWORD:
required: true
GHCR_USERNAME:
required: true
GHCR_TOKEN:
required: true

defaults:
run:
Expand All @@ -16,7 +20,7 @@ defaults:
jobs:
build-image-matrix:
if: github.repository_owner == 'mosaicml'
runs-on: ubuntu-latest
runs-on: linux-ubuntu-latest
timeout-minutes: 2
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
Expand Down Expand Up @@ -64,3 +68,5 @@ jobs:
secrets:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_PASSWORD }}
ghcr_username: ${{ secrets.GHCR_USERNAME }}
ghcr_password: ${{ secrets.GHCR_TOKEN }}
2 changes: 2 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,5 @@ jobs:
secrets:
DOCKER_HUB_USERNAME: ${{ secrets.DOCKER_HUB_USERNAME }}
DOCKER_HUB_PASSWORD: ${{ secrets.DOCKER_HUB_PASSWORD }}
GHCR_USERNAME: ${{ secrets.GHCR_USERNAME }}
GHCR_TOKEN: ${{ secrets.GHCR_TOKEN }}
26 changes: 13 additions & 13 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ all dependencies for both NLP and Vision models. They are built on top of the
`mosaicml/composer:latest` or `mosaicml/composer:latest_cpu`, which will always be up to date.

<!-- BEGIN_COMPOSER_BUILD_MATRIX -->
| Composer Version | CUDA Support | Docker Tag |
|--------------------|----------------|----------------------------------------------------------------|
| 0.23.5 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.23.5` |
| Composer Version | CUDA Support | Docker Tag |
|--------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0.23.5 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.23.5` |
| 0.23.5 | No | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.23.5_cpu` |
<!-- END_COMPOSER_BUILD_MATRIX -->

Expand All @@ -28,17 +28,17 @@ The [`mosaicml/pytorch`](https://hub.docker.com/r/mosaicml/pytorch) images conta
To install composer, once inside the image, run `pip install mosaicml`.

<!-- BEGIN_PYTORCH_BUILD_MATRIX -->
| Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags |
|----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------|
| Ubuntu 20.04 | Base | 2.4.0 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04` |
| Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags |
|----------------|----------|-------------------|---------------------|------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Ubuntu 20.04 | Base | 2.4.0 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.4.0 | 12.4.1 (EFA) | 3.11 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.4.0 | cpu | 3.11 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.4.0_cpu-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.3.1 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.3.1 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.3.1 | cpu | 3.11 | `mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.2.2 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.2_cu121-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.2.2 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.2.2_cu121-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.2.2 | cpu | 3.11 | `mosaicml/pytorch:2.2.2_cpu-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.4.0 | cpu | 3.11 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.4.0_cpu-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.3.1 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.3.1 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.3.1 | cpu | 3.11 | `mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.2.2 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.2_cu121-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.2.2 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.2.2_cu121-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.2.2 | cpu | 3.11 | `mosaicml/pytorch:2.2.2_cpu-python3.11-ubuntu20.04` |
<!-- END_PYTORCH_BUILD_MATRIX -->

**Note**: The `mosaicml/pytorch:latest`, `mosaicml/pytorch:latest_cpu`, and `mosaicml/pytorch:latest-aws`
Expand Down
16 changes: 16 additions & 0 deletions docker/build_matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.4.0_cu124-python3.11-ubuntu20.04
- mosaicml/pytorch:latest
- ghcr.io/databricks-mosaic/pytorch:latest
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.19.0
- AWS_OFI_NCCL_VERSION: v1.9.1-aws
Expand All @@ -26,7 +28,9 @@
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
- ghcr.io/databricks-mosaic/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
- mosaicml/pytorch:latest-aws
- ghcr.io/databricks-mosaic/pytorch:latest-aws
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.19.0
- AWS_OFI_NCCL_VERSION: ''
Expand All @@ -41,7 +45,9 @@
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/pytorch:2.4.0_cpu-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.4.0_cpu-python3.11-ubuntu20.04
- mosaicml/pytorch:latest_cpu
- ghcr.io/databricks-mosaic/pytorch:latest_cpu
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.19.0
- AWS_OFI_NCCL_VERSION: ''
Expand Down Expand Up @@ -69,6 +75,7 @@
PYTORCH_VERSION: 2.3.1
TAGS:
- mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.18.1
- AWS_OFI_NCCL_VERSION: v1.9.1-aws
Expand Down Expand Up @@ -96,6 +103,7 @@
PYTORCH_VERSION: 2.3.1
TAGS:
- mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04-aws
- ghcr.io/databricks-mosaic/pytorch:2.3.1_cu121-python3.11-ubuntu20.04-aws
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.18.1
- AWS_OFI_NCCL_VERSION: ''
Expand All @@ -110,6 +118,7 @@
PYTORCH_VERSION: 2.3.1
TAGS:
- mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.3.1_cpu-python3.11-ubuntu20.04
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.18.1
- AWS_OFI_NCCL_VERSION: ''
Expand Down Expand Up @@ -137,6 +146,7 @@
PYTORCH_VERSION: 2.2.2
TAGS:
- mosaicml/pytorch:2.2.2_cu121-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.2.2_cu121-python3.11-ubuntu20.04
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.17.2
- AWS_OFI_NCCL_VERSION: v1.9.1-aws
Expand Down Expand Up @@ -164,6 +174,7 @@
PYTORCH_VERSION: 2.2.2
TAGS:
- mosaicml/pytorch:2.2.2_cu121-python3.11-ubuntu20.04-aws
- ghcr.io/databricks-mosaic/pytorch:2.2.2_cu121-python3.11-ubuntu20.04-aws
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.17.2
- AWS_OFI_NCCL_VERSION: ''
Expand All @@ -178,6 +189,7 @@
PYTORCH_VERSION: 2.2.2
TAGS:
- mosaicml/pytorch:2.2.2_cpu-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.2.2_cpu-python3.11-ubuntu20.04
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.17.2
- AWS_OFI_NCCL_VERSION: ''
Expand All @@ -193,7 +205,9 @@
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/composer:0.23.5
- ghcr.io/databricks-mosaic/composer:0.23.5
- mosaicml/composer:latest
- ghcr.io/databricks-mosaic/composer:latest
TARGET: composer_stage
TORCHVISION_VERSION: 0.19.0
- AWS_OFI_NCCL_VERSION: ''
Expand All @@ -209,6 +223,8 @@
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/composer:0.23.5_cpu
- ghcr.io/databricks-mosaic/composer:0.23.5_cpu
- mosaicml/composer:latest_cpu
- ghcr.io/databricks-mosaic/composer:latest_cpu
TARGET: composer_stage
TORCHVISION_VERSION: 0.19.0
31 changes: 21 additions & 10 deletions docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import itertools
import os
import re
import sys

import packaging.version
Expand Down Expand Up @@ -105,34 +106,39 @@ def _get_cuda_override(cuda_version: str):
def _get_pytorch_tags(python_version: str, pytorch_version: str, cuda_version: str, stage: str, interconnect: str):
if stage == 'pytorch_stage':
base_image_name = 'mosaicml/pytorch'
ghcr_base_image_name = 'ghcr.io/databricks-mosaic/pytorch'
else:
raise ValueError(f'Invalid stage: {stage}')
tags = []
cuda_version_tag = _get_cuda_version_tag(cuda_version)
tags = [f'{base_image_name}:{pytorch_version}_{cuda_version_tag}-python{python_version}-ubuntu20.04']
tags += [
f'{base_image_name}:{pytorch_version}_{cuda_version_tag}-python{python_version}-ubuntu20.04',
f'{ghcr_base_image_name}:{pytorch_version}_{cuda_version_tag}-python{python_version}-ubuntu20.04',
]

if python_version == PRODUCTION_PYTHON_VERSION and pytorch_version == PRODUCTION_PYTORCH_VERSION:
if not cuda_version:
tags.append(f'{base_image_name}:latest_cpu')
tags += [f'{base_image_name}:latest_cpu', f'{ghcr_base_image_name}:latest_cpu']
else:
tags.append(f'{base_image_name}:latest')
tags += [f'{base_image_name}:latest', f'{ghcr_base_image_name}:latest']

if interconnect == 'EFA':
tags = [f'{tag}-aws' for tag in tags]

return tags


def _get_composer_tags(composer_version: str, use_cuda: bool):
base_image_name = 'mosaicml/composer'
ghcr_base_image_name = 'ghcr.io/databricks-mosaic/composer'

tags = []
if not use_cuda:
tags.append(f'{base_image_name}:{composer_version}_cpu')
tags.append(f'{base_image_name}:latest_cpu')
tags += [f'{base_image_name}:{composer_version}_cpu', f'{ghcr_base_image_name}:{composer_version}_cpu']
tags += [f'{base_image_name}:latest_cpu', f'{ghcr_base_image_name}:latest_cpu']
else:
tags.append(f'{base_image_name}:{composer_version}')
tags.append(f'{base_image_name}:latest')

tags += [f'{base_image_name}:{composer_version}', f'{ghcr_base_image_name}:{composer_version}']
tags += [f'{base_image_name}:latest', f'{ghcr_base_image_name}:latest']
print(tags)
return tags


Expand Down Expand Up @@ -161,8 +167,13 @@ def _write_table(table_tag: str, table_contents: str):
end_table_tag = f'<!-- END_{table_tag} -->'

pre = contents.split(begin_table_tag)[0]
post = contents.split(end_table_tag)[1]
if end_table_tag in contents:
post = contents.split(end_table_tag)[1]
else:
print(f"Warning: '{end_table_tag}' not found in contents.")
post = ''
new_readme = f'{pre}{begin_table_tag}\n{table_contents}\n{end_table_tag}{post}'
new_readme = re.sub(r'`ghcr\.io\S*, ', '', new_readme)

with open(os.path.join(os.path.dirname(__name__), 'README.md'), 'w') as f:
f.write(new_readme)
Expand Down

0 comments on commit dec879e

Please sign in to comment.