Nightly Rosetta T5x build and test #141
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Nightly Rosetta T5x build and test | |
on: | |
workflow_run: | |
workflows: [Nightly T5X build] | |
types: [completed] | |
branches: [main] | |
workflow_dispatch: | |
inputs: | |
BASE_IMAGE: | |
type: string | |
description: 'T5x image built by NVIDIA/JAX-Toolbox' | |
default: 'ghcr.io/nvidia/upstream-t5x:latest' | |
required: true | |
PUBLISH: | |
type: boolean | |
description: Publish dated images and update the 'latest' tag? | |
default: false | |
required: false | |
env: | |
BASE_LIBRARY: t5x | |
DOCKER_REGISTRY: ghcr.io/nvidia | |
permissions: | |
contents: read # to fetch code | |
actions: write # to cancel previous workflows | |
packages: write # to upload container | |
jobs: | |
metadata: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
runs-on: ubuntu-22.04 | |
outputs: | |
BUILD_DATE: ${{ steps.meta-vars.outputs.BUILD_DATE }} | |
BASE_LIBRARY: ${{ steps.meta-vars.outputs.BASE_LIBRARY }} | |
BASE_IMAGE: ${{ steps.meta-vars.outputs.BASE_IMAGE }} | |
PUBLISH: ${{ steps.meta-vars.outputs.PUBLISH }} | |
steps: | |
- name: Set build metadata | |
id: meta-vars | |
shell: bash -x -e {0} | |
run: | | |
BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') | |
if [[ -z "${{ inputs.BASE_IMAGE }}" ]]; then | |
BASE_IMAGE=${{ env.DOCKER_REGISTRY }}/upstream-${{ env.BASE_LIBRARY }}:latest | |
else | |
BASE_IMAGE=${{ inputs.BASE_IMAGE }} | |
fi | |
echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT | |
echo "BASE_LIBRARY=${{ env.BASE_LIBRARY }}" >> $GITHUB_OUTPUT | |
echo "BASE_IMAGE=${BASE_IMAGE}" >> $GITHUB_OUTPUT | |
echo "PUBLISH=${{ inputs.PUBLISH }}" >> $GITHUB_OUTPUT | |
build: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
needs: metadata | |
uses: ./.github/workflows/_build_rosetta.yaml | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
BASE_LIBRARY: ${{ needs.metadata.outputs.BASE_LIBRARY }} | |
BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE }} | |
# TODO: Can't build ARM until https://github.com/NVIDIA/JAX-Toolbox/pull/252 is available | |
PLATFORMS: '["amd64"]' | |
secrets: inherit | |
publish-build: | |
needs: [metadata, build] | |
uses: ./.github/workflows/_publish_badge.yaml | |
if: ( success() || failure() ) && (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
secrets: inherit | |
with: | |
ENDPOINT_FILENAME: 'rosetta-t5x-build-status.json' | |
PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} | |
SCRIPT: | | |
if [[ ${{ needs.build.result }} == "success" ]]; then | |
BADGE_COLOR=brightgreen | |
MSG=passing | |
else | |
BADGE_COLOR=red | |
MSG=failing | |
fi | |
echo "LABEL='nightly'" >> $GITHUB_OUTPUT | |
echo "MESSAGE='${MSG}'" >> $GITHUB_OUTPUT | |
echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT | |
test-unit: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
needs: build | |
uses: ./.github/workflows/_test_rosetta.yaml | |
with: | |
ROSETTA_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
secrets: inherit | |
test-t5x: | |
needs: build | |
uses: ./.github/workflows/_test_t5x.yaml | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
with: | |
T5X_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
# Disable packing b/c rosetta-t5x images run with TE by default, and TE does not currently support packing | |
EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False" | |
secrets: inherit | |
publish-t5x: | |
needs: [metadata, test-t5x] | |
uses: ./.github/workflows/_publish_t5x_results.yaml | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
EXPERIMENT_SUBDIR: ROSETTA_T5X | |
secrets: inherit | |
publish-test: | |
needs: [metadata, build, test-unit, test-t5x] | |
uses: ./.github/workflows/_publish_badge.yaml | |
if: ( always() ) | |
secrets: inherit | |
with: | |
ENDPOINT_FILENAME: 'rosetta-t5x-overall-test-status.json' | |
PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} | |
SCRIPT: | | |
UNIT_STATUS=${{ needs.test-unit.outputs.TEST_STATUS }} | |
T5X_STATUS=${{ needs.test-t5x.outputs.TEST_STATUS }} | |
echo "LABEL='Tests'" >> $GITHUB_OUTPUT | |
if [[ ${{ needs.build.result }} == "success" ]]; then | |
if [[ $UNIT_STATUS == "success" ]] && [[ $T5X_STATUS == "success" ]]; then | |
COLOR=brightgreen | |
MESSAGE="Unit passed / MGMN passed" | |
elif [[ $UNIT_STATUS == "success" ]]; then | |
COLOR=yellow | |
MESSAGE="Unit passed / MGMN failed" | |
elif [[ $T5X_STATUS == "success" ]]; then | |
COLOR=yellow | |
MESSAGE="Unit failed / MGMN passed" | |
else | |
COLOR=red | |
MESSAGE="Unit failed / MGMN failed" | |
fi | |
else | |
MESSAGE="n/a" | |
COLOR="red" | |
fi | |
echo "MESSAGE='${MESSAGE}'" >> $GITHUB_OUTPUT | |
echo "COLOR='${COLOR}'" >> $GITHUB_OUTPUT | |
publish-latest-container: | |
needs: [metadata, build, test-t5x, test-unit] | |
if: ( needs.test-unit.outputs.TEST_STATUS == 'success' && needs.test-t5x.outputs.TEST_STATUS == 'success' ) && ((github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH)) | |
uses: ./.github/workflows/_publish_container.yaml | |
secrets: inherit | |
with: | |
SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
TARGET_IMAGE: t5x | |
TARGET_TAGS: | | |
type=raw,value=latest,priority=1000 | |
publish-container: | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) | |
needs: [metadata, build] | |
uses: ./.github/workflows/_publish_container.yaml | |
secrets: inherit | |
with: | |
SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} | |
TARGET_IMAGE: t5x | |
TARGET_TAGS: | | |
type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 | |
if-upstream-failed: | |
runs-on: ubuntu-latest | |
if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch' | |
steps: | |
- run: echo 'Upstream workflow failed, aborting run' && exit 1 |