[execution window] further optimizations #1553
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Continuously run stable forge tests against the latest main branch. | |
name: Continuous Forge Tests - Stable | |
permissions: | |
issues: write | |
pull-requests: write | |
contents: read | |
id-token: write | |
actions: write #required for workflow cancellation via check-aptos-core | |
on: | |
# Allow triggering manually | |
workflow_dispatch: | |
inputs: | |
IMAGE_TAG: | |
required: false | |
type: string | |
description: The docker image tag to test. This may be a git SHA1, or a tag like "<branch>_<git SHA1>". If not specified, Forge will find the latest build based on the git history (starting from GIT_SHA input) | |
GIT_SHA: | |
required: false | |
type: string | |
description: The git SHA1 to checkout. This affects the Forge test runner that is used. If not specified, the latest main will be used | |
TEST_NAME: | |
required: true | |
type: choice | |
description: The specific stable test to run. If 'all', all stable tests will be run | |
default: 'all' | |
options: | |
- all | |
- framework-upgrade-test | |
- realistic-env-load-sweep | |
- realistic-env-workload-sweep | |
- realistic-env-graceful-overload | |
- realistic-env-graceful-workload-sweep | |
- realistic-env-fairness-workload-sweep | |
- realistic-network-tuned-for-throughput | |
- consensus-stress-test | |
- workload-mix-test | |
- single-vfn-perf | |
- fullnode-reboot-stress-test | |
- compat | |
- changing-working-quorum-test | |
- changing-working-quorum-test-high-load | |
- pfn-const-tps-realistic-env | |
- realistic-env-max-load-long | |
JOB_PARALLELISM: | |
required: false | |
type: number | |
description: The number of test jobs to run in parallel. If not specified, defaults to 1 | |
default: 1 | |
# NOTE: to support testing different branches on different schedules, you need to specify the cron schedule in the 'determine-test-branch' step as well below | |
# Reference: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule | |
schedule: | |
- cron: "0 22 * * 0,2,4" # The main branch cadence. This runs every Sun,Tues,Thurs | |
pull_request: | |
paths: | |
- ".github/workflows/forge-stable.yaml" | |
- "testsuite/find_latest_image.py" | |
concurrency: | |
group: forge-stable-${{ format('{0}-{1}-{2}-{3}', github.ref_name, inputs.GIT_SHA, inputs.IMAGE_TAG, inputs.TEST_NAME) }} | |
cancel-in-progress: true | |
env: | |
AWS_ACCOUNT_NUM: ${{ secrets.ENV_ECR_AWS_ACCOUNT_NUM }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
IMAGE_TAG: ${{ inputs.IMAGE_TAG }} # this is only used for workflow_dispatch, otherwise defaults to empty | |
AWS_REGION: us-west-2 | |
jobs: | |
generate-matrix: | |
runs-on: ubuntu-latest | |
outputs: | |
matrix: ${{ steps.set-matrix.outputs.matrix }} | |
imageVariants: ${{ steps.set-matrix.outputs.imageVariants }} | |
steps: | |
- name: Compute matrix | |
id: set-matrix | |
uses: actions/github-script@v7 | |
env: | |
TEST_NAME: ${{ inputs.TEST_NAME }} | |
with: | |
result-encoding: string | |
script: | | |
const testName = process.env.TEST_NAME || 'all'; | |
console.log(`Running job: ${testName}`); | |
const tests = [ | |
{ TEST_NAME: 'framework-upgrade-test', FORGE_RUNNER_DURATION_SECS: 7200, FORGE_TEST_SUITE: 'framework_upgrade' }, | |
{ TEST_NAME: 'realistic-env-load-sweep', FORGE_RUNNER_DURATION_SECS: 1800, FORGE_TEST_SUITE: 'realistic_env_load_sweep' }, | |
{ TEST_NAME: 'realistic-env-workload-sweep', FORGE_RUNNER_DURATION_SECS: 2000, FORGE_TEST_SUITE: 'realistic_env_workload_sweep' }, | |
{ TEST_NAME: 'realistic-env-graceful-overload', FORGE_RUNNER_DURATION_SECS: 1200, FORGE_TEST_SUITE: 'realistic_env_graceful_overload' }, | |
{ TEST_NAME: 'realistic-env-graceful-workload-sweep', FORGE_RUNNER_DURATION_SECS: 2100, FORGE_TEST_SUITE: 'realistic_env_graceful_workload_sweep' }, | |
{ TEST_NAME: 'realistic-env-fairness-workload-sweep', FORGE_RUNNER_DURATION_SECS: 900, FORGE_TEST_SUITE: 'realistic_env_fairness_workload_sweep' }, | |
{ TEST_NAME: 'realistic-network-tuned-for-throughput', FORGE_RUNNER_DURATION_SECS: 900, FORGE_TEST_SUITE: 'realistic_network_tuned_for_throughput', FORGE_ENABLE_PERFORMANCE: true }, | |
{ TEST_NAME: 'consensus-stress-test', FORGE_RUNNER_DURATION_SECS: 2400, FORGE_TEST_SUITE: 'consensus_stress_test' }, | |
{ TEST_NAME: 'workload-mix-test', FORGE_RUNNER_DURATION_SECS: 900, FORGE_TEST_SUITE: 'workload_mix' }, | |
{ TEST_NAME: 'single-vfn-perf', FORGE_RUNNER_DURATION_SECS: 480, FORGE_TEST_SUITE: 'single_vfn_perf' }, | |
{ TEST_NAME: 'fullnode-reboot-stress-test', FORGE_RUNNER_DURATION_SECS: 1800, FORGE_TEST_SUITE: 'fullnode_reboot_stress_test' }, | |
{ TEST_NAME: 'compat', FORGE_RUNNER_DURATION_SECS: 300, FORGE_TEST_SUITE: 'compat' }, | |
{ TEST_NAME: 'changing-working-quorum-test', FORGE_RUNNER_DURATION_SECS: 1200, FORGE_TEST_SUITE: 'changing_working_quorum_test', FORGE_ENABLE_FAILPOINTS: true }, | |
{ TEST_NAME: 'changing-working-quorum-test-high-load', FORGE_RUNNER_DURATION_SECS: 900, FORGE_TEST_SUITE: 'changing_working_quorum_test_high_load', FORGE_ENABLE_FAILPOINTS: true }, | |
{ TEST_NAME: 'pfn-const-tps-realistic-env', FORGE_RUNNER_DURATION_SECS: 900, FORGE_TEST_SUITE: 'pfn_const_tps_with_realistic_env' }, | |
{ TEST_NAME: 'realistic-env-max-load-long', FORGE_RUNNER_DURATION_SECS: 7200, FORGE_TEST_SUITE: 'realistic_env_max_load_large' } | |
]; | |
const matrix = testName != "all" ? tests.filter(test => test.TEST_NAME === testName) : tests; | |
core.debug(`Matrix: ${JSON.stringify(matrix)}`); | |
core.summary.addHeading('Forge Stable Run'); | |
const testsToRunNames = matrix.map(test => `${test.TEST_NAME} (Needs Failpoint: ${test.FORGE_ENABLE_FAILPOINTS || false}, Needs Performance: ${test.FORGE_ENABLE_PERFORMANCE || false})`); | |
core.summary.addRaw("The following tests will be run:", true); | |
core.summary.addList(testsToRunNames); | |
core.summary.write(); | |
const needsFailpoints = matrix.some(test => test.FORGE_ENABLE_FAILPOINTS); | |
const needsPerformance = matrix.some(test => test.FORGE_ENABLE_PERFORMANCE); | |
let requiredImageVariants = []; | |
if (needsFailpoints) { | |
requiredImageVariants.push('failpoints'); | |
} | |
if (needsPerformance) { | |
requiredImageVariants.push('performance'); | |
} | |
core.setOutput('matrix', JSON.stringify({ include: matrix })); | |
core.setOutput('imageVariants', requiredImageVariants.join(' ')); | |
# This job determines the image tag and branch to test, and passes them to the other jobs | |
# NOTE: this may be better as a separate workflow as the logic is quite complex but generalizable | |
determine-test-metadata: | |
runs-on: ubuntu-latest | |
needs: ["generate-matrix"] | |
outputs: | |
IMAGE_TAG: ${{ steps.get-docker-image-tag.outputs.IMAGE_TAG }} | |
IMAGE_TAG_FOR_COMPAT_TEST: ${{ steps.get-last-released-image-tag-for-compat-test.outputs.IMAGE_TAG }} | |
BRANCH: ${{ steps.determine-test-branch.outputs.BRANCH }} | |
BRANCH_HASH: ${{ steps.hash-branch.outputs.BRANCH_HASH }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Determine branch based on cadence | |
id: determine-test-branch | |
# NOTE: the schedule cron MUST match the one in the 'on.schedule.cron' section above | |
run: | | |
if [[ "${{ github.event_name }}" == "schedule" ]]; then | |
if [[ "${{ github.event.schedule }}" == "0 22 * * 0,2,4" ]]; then | |
echo "Branch: main" | |
echo "BRANCH=main" >> $GITHUB_OUTPUT | |
else | |
echo "Unknown schedule: ${{ github.event.schedule }}" | |
exit 1 | |
fi | |
elif [[ "${{ github.event_name }}" == "push" ]]; then | |
echo "Branch: ${{ github.ref_name }}" | |
echo "BRANCH=${{ github.ref_name }}" >> $GITHUB_OUTPUT | |
# on workflow_dispatch, this will simply use the inputs.GIT_SHA given (or the default) | |
elif [[ -n "${{ inputs.GIT_SHA }}" ]]; then | |
echo "BRANCH=${{ inputs.GIT_SHA }}" >> $GITHUB_OUTPUT | |
# if GIT_SHA not provided, use the branch where workflow runs on | |
else | |
echo "BRANCH=${{ github.head_ref }}" >> $GITHUB_OUTPUT | |
fi | |
# Use the branch hash instead of the full branch name to stay under kubernetes namespace length limit | |
- name: Hash the branch | |
id: hash-branch | |
run: | | |
# Hashing the branch name | |
echo "BRANCH_HASH=$(echo -n "${{ steps.determine-test-branch.outputs.BRANCH }}" | sha256sum | cut -c1-10)" >> $GITHUB_OUTPUT | |
- uses: aptos-labs/aptos-core/.github/actions/check-aptos-core@main | |
with: | |
cancel-workflow: ${{ github.event_name == 'schedule' }} # Cancel the workflow if it is scheduled on a fork | |
# actions/get-latest-docker-image-tag requires docker utilities and having authenticated to internal docker image registries | |
- uses: aptos-labs/aptos-core/.github/actions/docker-setup@main | |
id: docker-setup | |
with: | |
GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} | |
GCP_SERVICE_ACCOUNT_EMAIL: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} | |
EXPORT_GCP_PROJECT_VARIABLES: "false" | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
AWS_DOCKER_ARTIFACT_REPO: ${{ secrets.AWS_DOCKER_ARTIFACT_REPO }} | |
GIT_CREDENTIALS: ${{ secrets.GIT_CREDENTIALS }} | |
- uses: aptos-labs/aptos-core/.github/actions/get-latest-docker-image-tag@main | |
id: get-docker-image-tag | |
with: | |
branch: ${{ steps.determine-test-branch.outputs.BRANCH }} | |
variants: ${{ needs.generate-matrix.outputs.imageVariants }} | |
- uses: aptos-labs/aptos-core/.github/actions/determine-or-use-target-branch-and-get-last-released-image@main | |
id: get-last-released-image-tag-for-compat-test | |
with: | |
base-branch: ${{ steps.determine-test-branch.outputs.BRANCH }} | |
variants: ${{ needs.generate-matrix.outputs.imageVariants }} | |
- name: Write summary | |
run: | | |
IMAGE_TAG=${{ steps.get-docker-image-tag.outputs.IMAGE_TAG }} | |
IMAGE_TAG_FOR_COMPAT_TEST=${{ steps.get-last-released-image-tag-for-compat-test.outputs.IMAGE_TAG }} | |
TARGET_BRANCH_TO_FETCH_IMAGE_FOR_COMPAT_TEST=${{ steps.get-last-released-image-tag-for-compat-test.outputs.TARGET_BRANCH }} | |
BRANCH=${{ steps.determine-test-branch.outputs.BRANCH }} | |
if [ -n "${BRANCH}" ]; then | |
echo "BRANCH: [${BRANCH}](https://github.com/${{ github.repository }}/tree/${BRANCH})" >> $GITHUB_STEP_SUMMARY | |
fi | |
echo "IMAGE_TAG: [${IMAGE_TAG}](https://github.com/${{ github.repository }}/commit/${IMAGE_TAG})" >> $GITHUB_STEP_SUMMARY | |
echo "To cancel this job, do `pnpm infra ci cancel-workflow ${{ github.run_id }}` from internal-ops" >> $GITHUB_STEP_SUMMARY | |
run: | |
name: forge-${{ matrix.TEST_NAME }} | |
needs: [determine-test-metadata, generate-matrix] | |
if: ${{ github.event_name != 'pull_request' }} | |
strategy: | |
fail-fast: false | |
max-parallel: ${{ inputs.JOB_PARALLELISM && fromJson(inputs.JOB_PARALLELISM) || 1 }} | |
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} | |
uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main | |
secrets: inherit | |
with: | |
IMAGE_TAG: ${{ ((matrix.FORGE_TEST_SUITE == 'compat' && needs.determine-test-metadata.outputs.IMAGE_TAG_FOR_COMPAT_TEST) || needs.determine-test-metadata.outputs.IMAGE_TAG) }} | |
FORGE_NAMESPACE: forge-${{ matrix.TEST_NAME }}-${{ needs.determine-test-metadata.outputs.BRANCH_HASH }} | |
FORGE_TEST_SUITE: ${{ matrix.FORGE_TEST_SUITE }} | |
FORGE_RUNNER_DURATION_SECS: ${{ matrix.FORGE_RUNNER_DURATION_SECS }} | |
FORGE_ENABLE_PERFORMANCE: ${{ matrix.FORGE_ENABLE_PERFORMANCE || false }} | |
FORGE_ENABLE_FAILPOINTS: ${{ matrix.FORGE_ENABLE_FAILPOINTS || false }} | |
POST_TO_SLACK: true | |
SEND_RESULTS_TO_TRUNK: true |