Nightly Rosetta T5x build and test (workflow_dispatch) #201
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Nightly Rosetta T5x build and test | |
run-name: Nightly Rosetta T5x build and test (${{ github.event_name == 'workflow_run' && format('nightly {0}', github.event.workflow_run.created_at) || github.event_name }}) | |
on: | |
workflow_run: | |
workflows: [Nightly T5X build] | |
types: [completed] | |
branches: [main] | |
workflow_dispatch: | |
inputs: | |
BASE_IMAGE: | |
type: string | |
description: 'Upstream T5x image built by NVIDIA/JAX-Toolbox' | |
default: 'ghcr.io/nvidia/upstream-t5x:latest' | |
required: true | |
T5X_PATCHES: | |
type: string | |
description: "Comma separated patches (Ex1: 'p1,p2' converted to 'p1\np2'). (Ex2: ',' == empty patchlist). Default: use upstream" | |
default: '' | |
required: false | |
FLAX_PATCHES: | |
type: string | |
description: "Comma separated patches (Ex1: 'p1,p2' converted to 'p1\np2'). (Ex2: ',' == empty patchlist). Default: use upstream" | |
default: '' | |
required: false | |
PUBLISH: | |
type: boolean | |
description: Publish dated images and update the 'latest' tag? | |
default: false | |
required: false | |
env: | |
BASE_LIBRARY: t5x | |
DOCKER_REGISTRY: ghcr.io/nvidia | |
# Relative to rosetta/ | |
CUSTOM_T5X_PATCHLIST: patchlist-t5x.txt | |
CUSTOM_FLAX_PATCHLIST: patchlist-flax.txt | |
permissions: | |
contents: read # to fetch code | |
actions: write # to cancel previous workflows | |
packages: write # to upload container | |
jobs: | |
metadata: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
runs-on: ubuntu-22.04 | |
outputs: | |
BUILD_DATE: ${{ steps.meta-vars.outputs.BUILD_DATE }} | |
BASE_LIBRARY: ${{ steps.meta-vars.outputs.BASE_LIBRARY }} | |
BASE_IMAGE: ${{ steps.meta-vars.outputs.BASE_IMAGE }} | |
CUSTOM_PATCHLISTS: ${{ steps.meta-vars.outputs.CUSTOM_PATCHLISTS }} | |
PUBLISH: ${{ steps.meta-vars.outputs.PUBLISH }} | |
steps: | |
- name: Set build metadata | |
id: meta-vars | |
shell: bash -x -e {0} | |
run: | | |
BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') | |
if [[ -z "${{ inputs.BASE_IMAGE }}" ]]; then | |
BASE_IMAGE=${{ env.DOCKER_REGISTRY }}/upstream-${{ env.BASE_LIBRARY }}:latest | |
else | |
BASE_IMAGE=${{ inputs.BASE_IMAGE }} | |
fi | |
echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT | |
echo "BASE_LIBRARY=${{ env.BASE_LIBRARY }}" >> $GITHUB_OUTPUT | |
echo "BASE_IMAGE=${BASE_IMAGE}" >> $GITHUB_OUTPUT | |
CUSTOM_PATCHLISTS=() | |
[[ -n "${{ inputs.T5X_PATCHES }}" ]] && CUSTOM_PATCHLISTS+=(${{ env.CUSTOM_T5X_PATCHLIST }}=${{ inputs.T5X_PATCHES }}) | |
[[ -n "${{ inputs.FLAX_PATCHES }}" ]] && CUSTOM_PATCHLISTS+=(${{ env.CUSTOM_FLAX_PATCHLIST }}=${{ inputs.FLAX_PATCHES }}) | |
IFS=";" | |
echo "CUSTOM_PATCHLISTS=${CUSTOM_PATCHLISTS[*]}" >> $GITHUB_OUTPUT | |
unset IFS | |
echo "PUBLISH=${{ inputs.PUBLISH }}" >> $GITHUB_OUTPUT | |
build: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
needs: metadata | |
uses: ./.github/workflows/_build_rosetta.yaml | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
BASE_LIBRARY: ${{ needs.metadata.outputs.BASE_LIBRARY }} | |
BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE }} | |
CUSTOM_PATCHLISTS: ${{ needs.metadata.outputs.CUSTOM_PATCHLISTS }} | |
# TODO: Can't build ARM until https://github.com/NVIDIA/JAX-Toolbox/pull/252 is available | |
PLATFORMS: '["amd64"]' | |
secrets: inherit | |
publish-build: | |
needs: [metadata, build] | |
uses: ./.github/workflows/_publish_badge.yaml | |
if: ( success() || failure() ) && (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
secrets: inherit | |
with: | |
ENDPOINT_FILENAME: 'rosetta-t5x-build-status.json' | |
PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} | |
SCRIPT: | | |
if [[ ${{ needs.build.result }} == "success" ]]; then | |
BADGE_COLOR=brightgreen | |
MSG=passing | |
else | |
BADGE_COLOR=red | |
MSG=failing | |
fi | |
echo "LABEL='nightly'" >> $GITHUB_OUTPUT | |
echo "MESSAGE='${MSG}'" >> $GITHUB_OUTPUT | |
echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT | |
test-unit: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
needs: build | |
uses: ./.github/workflows/_test_rosetta.yaml | |
with: | |
ROSETTA_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
secrets: inherit | |
test-t5x: | |
needs: build | |
uses: ./.github/workflows/_test_t5x_rosetta.yaml | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
with: | |
T5X_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
secrets: inherit | |
test-vit: | |
needs: build | |
uses: ./.github/workflows/_test_vit.yaml | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
with: | |
ROSETTA_T5X_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
secrets: inherit | |
publish-t5x: | |
needs: [metadata, test-t5x, test-vit] | |
uses: ./.github/workflows/_publish_t5x_results.yaml | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
EXPERIMENT_SUBDIR: ROSETTA_T5X | |
secrets: inherit | |
publish-test: | |
needs: [metadata, build, test-unit, test-t5x, test-vit] | |
uses: ./.github/workflows/_publish_badge.yaml | |
if: ( always() ) | |
secrets: inherit | |
with: | |
ENDPOINT_FILENAME: 'rosetta-t5x-overall-test-status.json' | |
PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} | |
SCRIPT: | | |
UNIT_STATUS=${{ needs.test-unit.outputs.TEST_STATUS }} | |
T5X_STATUS=${{ needs.test-t5x.outputs.TEST_STATUS }} | |
VIT_STATUS=${{ needs.test-vit.outputs.TEST_STATUS }} | |
echo "LABEL='Tests'" >> $GITHUB_OUTPUT | |
if [[ ${{ needs.build.result }} == "success" ]]; then | |
if [[ $UNIT_STATUS == "success" ]] && [[ $T5X_STATUS == "success" ]] && [[ $VIT_STATUS == "success" ]]; then | |
COLOR=brightgreen | |
MESSAGE="Unit passed / MGMN passed" | |
elif [[ $UNIT_STATUS == "success" ]]; then | |
COLOR=yellow | |
MESSAGE="Unit passed / MGMN failed" | |
elif [[ $T5X_STATUS == "success" ]] && [[ $VIT_STATUS == "success" ]]; then | |
COLOR=yellow | |
MESSAGE="Unit failed / MGMN passed" | |
else | |
COLOR=red | |
MESSAGE="Unit failed / MGMN failed" | |
fi | |
else | |
MESSAGE="n/a" | |
COLOR="red" | |
fi | |
echo "MESSAGE='${MESSAGE}'" >> $GITHUB_OUTPUT | |
echo "COLOR='${COLOR}'" >> $GITHUB_OUTPUT | |
publish-latest-container: | |
needs: [metadata, build, test-t5x, test-unit, test-vit] | |
if: ( needs.test-unit.outputs.TEST_STATUS == 'success' && needs.test-t5x.outputs.TEST_STATUS == 'success' && needs.test-vit.outputs.TEST_STATUS == 'success' ) && ((github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH)) | |
uses: ./.github/workflows/_publish_container.yaml | |
secrets: inherit | |
with: | |
SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
TARGET_IMAGE: t5x | |
TARGET_TAGS: | | |
type=raw,value=latest,priority=1000 | |
publish-container: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) | |
needs: [metadata, build] | |
uses: ./.github/workflows/_publish_container.yaml | |
secrets: inherit | |
with: | |
SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
TARGET_IMAGE: t5x | |
TARGET_TAGS: | | |
type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 | |
if-upstream-failed: | |
runs-on: ubuntu-latest | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch' | |
steps: | |
- run: echo 'Upstream workflow failed, aborting run' && exit 1 |