From f3b5237cc05010d2b861514d1726975a02a33874 Mon Sep 17 00:00:00 2001 From: Michelangelo Riccobene Date: Tue, 7 May 2024 15:10:25 +0200 Subject: [PATCH] qa-tests: targeting test-runners with labels, detecting test completion, increasing workflow timeout (#10228) This pull request introduces several enhancements: - It specifies in the test workflow that commits on the main or release/3.x branches should be executed on the test-runner labeled Erigon3. - It enhances the detection of test completion, and will skip uploading test results if they are incomplete, thus avoiding erroneous error signals during the upload process. - It improves the directory cleanup process - It increases the workflow timeout (which is 6 hours by default but can be increased to 35 days on self-hosted runners) --- .../qa-clean-exit-block-downloading.yml | 18 +++++++------ .../qa-clean-exit-snapshot-downloading.yml | 25 +++++++++++-------- .github/workflows/qa-snap-download.yml | 25 +++++++++++-------- .github/workflows/qa-tip-tracking.yml | 16 +++++++----- 4 files changed, 51 insertions(+), 33 deletions(-) diff --git a/.github/workflows/qa-clean-exit-block-downloading.yml b/.github/workflows/qa-clean-exit-block-downloading.yml index c5b7a2b8d3c..1f3c9f406a0 100644 --- a/.github/workflows/qa-clean-exit-block-downloading.yml +++ b/.github/workflows/qa-clean-exit-block-downloading.yml @@ -3,17 +3,19 @@ name: QA - Clean exit (block downloading) on: push: branches: - - 'release/**' + - main + - 'release/3.*' pull_request: branches: - main - - 'release/**' + - 'release/3.*' types: - ready_for_review + workflow_dispatch: # Run manually jobs: long-running-test: - runs-on: self-hosted + runs-on: [self-hosted, Erigon3] env: ERIGON_REFERENCE_DATA_DIR: /opt/erigon-versions/reference-version/datadir ERIGON_TESTBED_DATA_DIR: /opt/erigon-testbed/datadir @@ -51,14 +53,15 @@ jobs: # Capture monitoring script exit status test_exit_status=$? + + # Save the subsection reached status + echo "::set-output name=test_executed::true" # Clean up Erigon process if it's still running if kill -0 $ERIGON_PID 2> /dev/null; then echo "Terminating Erigon" kill $ERIGON_PID wait $ERIGON_PID - else - echo "Erigon has already terminated" fi # Check test runner script exit status @@ -71,6 +74,7 @@ jobs: fi - name: Delete Erigon Testbed Data Directory + if: always() run: | rm -rf $ERIGON_TESTBED_DATA_DIR @@ -79,13 +83,13 @@ jobs: python3 $ERIGON_QA_PATH/test_system/db-producer/resume_production.py || true - name: Save test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' env: TEST_RESULT: ${{ steps.test_step.outputs.TEST_RESULT }} run: python3 $ERIGON_QA_PATH/test_system/qa-tests/uploads/upload_test_results.py --repo erigon --commit $(git rev-parse HEAD) --test_name clean-exit-block-downloading --outcome $TEST_RESULT --result_file ${{ github.workspace }}/result.json - name: Upload test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' uses: actions/upload-artifact@v4 with: name: test-results diff --git a/.github/workflows/qa-clean-exit-snapshot-downloading.yml b/.github/workflows/qa-clean-exit-snapshot-downloading.yml index 1e08cc6e9f8..79819132cd1 100644 --- a/.github/workflows/qa-clean-exit-snapshot-downloading.yml +++ b/.github/workflows/qa-clean-exit-snapshot-downloading.yml @@ -4,13 +4,14 @@ on: push: branches: - main - - 'release/**' + - 'release/3.*' pull_request: branches: - main - - 'release/**' + - 'release/3.*' types: - ready_for_review + workflow_dispatch: # Run manually jobs: long-running-test: @@ -24,9 +25,10 @@ jobs: - name: Check out repository uses: actions/checkout@v4 - - name: Clean Erigon Build Directory + - name: Clean Erigon Build & Data Directories run: | make clean + rm -rf $ERIGON_DATA_DIR - name: Build Erigon run: | @@ -48,18 +50,16 @@ jobs: # Capture monitoring script exit status test_exit_status=$? + # Save the subsection reached status + echo "::set-output name=test_executed::true" + # Clean up Erigon process if it's still running if kill -0 $ERIGON_PID 2> /dev/null; then echo "Terminating Erigon" kill $ERIGON_PID wait $ERIGON_PID - else - echo "Erigon has already terminated" fi - # Clean up Erigon build and data directories - rm -rf $ERIGON_DATA_DIR - # Check test runner script exit status if [ $test_exit_status -eq 0 ]; then echo "Tests completed successfully" @@ -69,18 +69,23 @@ jobs: echo "TEST_RESULT=failure" >> "$GITHUB_OUTPUT" fi + - name: Clean up Erigon data directory + if: always() + run: | + rm -rf $ERIGON_DATA_DIR + - name: Resume the Erigon instance dedicated to db maintenance run: | python3 $ERIGON_QA_PATH/test_system/db-producer/resume_production.py || true - name: Save test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' env: TEST_RESULT: ${{ steps.test_step.outputs.TEST_RESULT }} run: python3 $ERIGON_QA_PATH/test_system/qa-tests/uploads/upload_test_results.py --repo erigon --commit $(git rev-parse HEAD) --test_name clean-exit-snapshot-downloading --outcome $TEST_RESULT --result_file ${{ github.workspace }}/result.json - name: Upload test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' uses: actions/upload-artifact@v4 with: name: test-results diff --git a/.github/workflows/qa-snap-download.yml b/.github/workflows/qa-snap-download.yml index 7cdc94d3fb1..146cfa66fbd 100644 --- a/.github/workflows/qa-snap-download.yml +++ b/.github/workflows/qa-snap-download.yml @@ -7,7 +7,8 @@ on: jobs: long-running-test: - runs-on: self-hosted + runs-on: [self-hosted, Erigon3] + timeout-minutes: 600 env: ERIGON_DATA_DIR: ${{ github.workspace }}/erigon_data ERIGON_QA_PATH: /home/qarunner/erigon-qa @@ -18,9 +19,10 @@ jobs: - name: Check out repository uses: actions/checkout@v4 - - name: Clean Erigon Build Directory + - name: Clean Erigon Build & Data Directories run: | make clean + rm -rf $ERIGON_DATA_DIR - name: Build Erigon run: | @@ -35,25 +37,23 @@ jobs: id: test_step run: | set +e # Disable exit on error - + # Run Erigon, monitor snapshot downloading and check logs python3 $ERIGON_QA_PATH/test_system/qa-tests/snap-download/run_and_check_snap_download.py ${{ github.workspace }}/build/bin $ERIGON_DATA_DIR $TOTAL_TIME_SECONDS # Capture monitoring script exit status test_exit_status=$? + # Save the subsection reached status + echo "::set-output name=test_executed::true" + # Clean up Erigon process if it's still running if kill -0 $ERIGON_PID 2> /dev/null; then echo "Terminating Erigon" kill $ERIGON_PID wait $ERIGON_PID - else - echo "Erigon has already terminated" fi - # Clean up Erigon build and data directories - rm -rf $ERIGON_DATA_DIR - # Check test runner script exit status if [ $test_exit_status -eq 0 ]; then echo "Tests completed successfully" @@ -63,18 +63,23 @@ jobs: echo "TEST_RESULT=failure" >> "$GITHUB_OUTPUT" fi + - name: Clean up Erigon data directory + if: always() + run: | + rm -rf $ERIGON_DATA_DIR + - name: Resume the Erigon instance dedicated to db maintenance run: | python3 $ERIGON_QA_PATH/test_system/db-producer/resume_production.py || true - name: Save test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' env: TEST_RESULT: ${{ steps.test_step.outputs.TEST_RESULT }} run: python3 $ERIGON_QA_PATH/test_system/qa-tests/uploads/upload_test_results.py --repo erigon --commit $(git rev-parse HEAD) --test_name snap-download --outcome $TEST_RESULT --result_file ${{ github.workspace }}/result.json - name: Upload test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' uses: actions/upload-artifact@v4 with: name: test-results diff --git a/.github/workflows/qa-tip-tracking.yml b/.github/workflows/qa-tip-tracking.yml index 587242ff6e8..c9a45413276 100644 --- a/.github/workflows/qa-tip-tracking.yml +++ b/.github/workflows/qa-tip-tracking.yml @@ -3,10 +3,12 @@ name: QA - Tip tracking on: schedule: - cron: '0 0 * * *' # Run every day at 00:00 AM UTC + workflow_dispatch: # Run manually jobs: long-running-test: - runs-on: self-hosted + runs-on: [self-hosted, Erigon3] + timeout-minutes: 600 env: ERIGON_REFERENCE_DATA_DIR: /opt/erigon-versions/reference-version/datadir ERIGON_TESTBED_DATA_DIR: /opt/erigon-testbed/datadir @@ -29,7 +31,7 @@ jobs: - name: Pause the Erigon instance dedicated to db maintenance run: | - curl -X POST -H "Accept: application/json" -d '{"status": "paused"}' http://localhost:8080/production/default/status || true + python3 $ERIGON_QA_PATH/test_system/db-producer/pause_production.py || true - name: Restore Erigon Testbed Data Directory run: | @@ -48,13 +50,14 @@ jobs: # Capture monitoring script exit status test_exit_status=$? + # Save the subsection reached status + echo "::set-output name=test_executed::true" + # Clean up Erigon process if it's still running if kill -0 $ERIGON_PID 2> /dev/null; then echo "Terminating Erigon" kill $ERIGON_PID wait $ERIGON_PID - else - echo "Erigon has already terminated" fi # Check test runner script exit status @@ -67,6 +70,7 @@ jobs: fi - name: Delete Erigon Testbed Data Directory + if: always() run: | rm -rf $ERIGON_TESTBED_DATA_DIR @@ -75,13 +79,13 @@ jobs: python3 $ERIGON_QA_PATH/test_system/db-producer/resume_production.py || true - name: Save test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' env: TEST_RESULT: ${{ steps.test_step.outputs.TEST_RESULT }} run: python3 $ERIGON_QA_PATH/test_system/qa-tests/uploads/upload_test_results.py --repo erigon --commit $(git rev-parse HEAD) --test_name tip-tracking --outcome $TEST_RESULT --result_file ${{ github.workspace }}/result.json - name: Upload test results - if: always() + if: steps.test_step.outputs.test_executed == 'true' uses: actions/upload-artifact@v4 with: name: test-results