-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
424 additions
and
68 deletions.
There are no files selected for viewing
43 changes: 43 additions & 0 deletions
43
.github/workflows/provision-replay-verify-archive-disks.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# This defines a workflow to replay transactions on the given chain with the latest aptos node software. | ||
# In order to trigger it go to the Actions Tab of the Repo, click "replay-verify" and then "Run Workflow". | ||
# | ||
# On PR, a single test case will run. On workflow_dispatch, you may specify the CHAIN_NAME to verify. | ||
|
||
name: "provision-replay-verify-archive-disks" | ||
on: | ||
# Allow triggering manually | ||
workflow_dispatch: | ||
inputs: | ||
NETWORK: | ||
required: true | ||
type: string | ||
description: The network to provision storage for. | ||
BRANCH: | ||
type: string | ||
description: The branch to provision storage for. | ||
pull_request: | ||
paths: | ||
- '.github/workflows/provision-replay-verify-archive-disks.yaml' | ||
- '.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml' | ||
schedule: | ||
- cron: "0 22 * * 0,2,4" # The main branch cadence. This runs every Sun,Tues,Thurs | ||
|
||
permissions: | ||
contents: read | ||
id-token: write #required for GCP Workload Identity federation which we use to login into Google Artifact Registry | ||
issues: read | ||
pull-requests: read | ||
|
||
jobs: | ||
replay-testnet: | ||
uses: ./.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml | ||
secrets: inherit | ||
with: | ||
NETWORK: testnet | ||
BRANCH: ${{ github.event.inputs.BRANCH }} | ||
replay-mainnet: | ||
uses: ./.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml | ||
secrets: inherit | ||
with: | ||
NETWORK: mainnet | ||
BRANCH: ${{ github.event.inputs.BRANCH }} |
80 changes: 80 additions & 0 deletions
80
.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
name: "*run replay-verify reusable workflow" | ||
|
||
on: | ||
# This allows the workflow to be triggered from another workflow | ||
workflow_call: | ||
inputs: | ||
NETWORK: | ||
required: true | ||
type: string | ||
description: The network to provision storage for. | ||
BRANCH: | ||
type: string | ||
description: The branch to provision storage for. | ||
# This allows the workflow to be triggered manually from the Github UI or CLI | ||
# NOTE: because the "number" type is not supported, we default to 720 minute timeout | ||
workflow_dispatch: | ||
inputs: | ||
NETWORK: | ||
description: The network to provision storage for. | ||
type: string | ||
required: true | ||
BRANCH: | ||
description: The branch to provision storage for. | ||
type: string | ||
jobs: | ||
provision: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
with: | ||
ref: ${{ github.event.inputs.BRANCH || 'add_replay_verify_workflow' }} | ||
- name: "Setup GCloud project" | ||
shell: bash | ||
run: gcloud config set project aptos-devinfra-0 | ||
- name: Authenticate to Google Cloud | ||
uses: "google-github-actions/auth@v2" | ||
with: | ||
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} | ||
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL}} | ||
|
||
- name: "Setup GCloud project" | ||
shell: bash | ||
run: gcloud config set project aptos-devinfra-0 | ||
|
||
- name: "Get GCP project id" | ||
id: get-gcp-project-id | ||
shell: bash | ||
run: echo "GCP_PROJECT_ID=$(gcloud config get-value project)" | ||
|
||
- name: Set up Cloud SDK | ||
uses: "google-github-actions/setup-gcloud@v2" | ||
with: | ||
install_components: "kubectl, gke-gcloud-auth-plugin" | ||
|
||
|
||
|
||
- name: Setup python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.10.12 | ||
|
||
# Install Poetry. | ||
- name: Install and configure Poetry | ||
uses: snok/install-poetry@v1 | ||
with: | ||
version: 1.5.1 | ||
virtualenvs-create: true | ||
virtualenvs-in-project: false | ||
|
||
- name: Install poetry project | ||
run: poetry install --no-root | ||
shell: bash | ||
working-directory: ./testsuite/replay-verify | ||
|
||
- name: "Provision storage" | ||
run: cd testsuite/replay-verify && poetry run python archive_disk_utils.py --network ${{ inputs.NETWORK }} | ||
|
||
|
||
|
253 changes: 253 additions & 0 deletions
253
.github/workflows/workflow-run-replay-verify-on-archive.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,253 @@ | ||
name: "*run replay-verify reusable workflow" | ||
|
||
on: | ||
# This allows the workflow to be triggered from another workflow | ||
workflow_call: | ||
inputs: | ||
GIT_SHA: | ||
required: true | ||
type: string | ||
description: The git SHA1 to test. | ||
# replay-verify config | ||
START_VERSION: | ||
required: false | ||
type: string | ||
description: The history start to use for the backup. If not specified, it will use the default history start. | ||
END_VERSION: | ||
required: false | ||
type: string | ||
description: The end version to use for the backup. If not specified, it will use the latest version. | ||
RANGES_TO_SKIP: | ||
required: false | ||
type: string | ||
description: The optional list of transaction ranges to skip. | ||
RUNS_ON: | ||
description: "The runner to use for the job." | ||
type: string | ||
required: true | ||
default: "medium-perf-local-ssd" | ||
# This allows the workflow to be triggered manually from the Github UI or CLI | ||
# NOTE: because the "number" type is not supported, we default to 720 minute timeout | ||
workflow_dispatch: | ||
inputs: | ||
GIT_SHA: | ||
required: true | ||
type: string | ||
description: The git SHA1 to test. | ||
# replay-verify config | ||
START_VERSION: | ||
required: false | ||
type: string | ||
description: The history start to use for the backup. If not specified, it will use the default history start. | ||
END_VERSION: | ||
required: false | ||
type: string | ||
description: The end version to use for the backup. If not specified, it will use the latest version. | ||
RANGES_TO_SKIP: | ||
required: false | ||
type: string | ||
description: The optional list of transaction ranges to skip. | ||
RUNS_ON: | ||
description: "The runner to use for the job." | ||
type: string | ||
required: true | ||
default: "high-perf-docker-with-local-ssd" | ||
jobs: | ||
prepare: | ||
runs-on: ${{ inputs.RUNS_ON }} | ||
outputs: | ||
job_ids: ${{ steps.gen-jobs.outputs.job_ids }} | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
with: | ||
ref: ${{ inputs.GIT_SHA }} | ||
|
||
- name: Load cached aptos-debugger binary | ||
id: cache-aptos-debugger-binary | ||
uses: actions/cache@v4 | ||
with: | ||
# copy the binary to the root of the repo and cache it there, because rust-setup calls a cache-rust action | ||
# which cleans up the target directory in its post action | ||
path: | | ||
aptos-debugger | ||
key: aptos-debugger-${{ inputs.GIT_SHA || github.sha }} | ||
|
||
- name: Prepare for build if not cached | ||
if: steps.cache-aptos-debugger-binary.outputs.cache-hit != 'true' | ||
uses: aptos-labs/aptos-core/.github/actions/rust-setup@main | ||
with: | ||
GIT_CREDENTIALS: ${{ inputs.GIT_CREDENTIALS }} | ||
|
||
- name: Build and strip aptos-debugger binary if not cached | ||
if: steps.cache-aptos-debugger-binary.outputs.cache-hit != 'true' | ||
shell: bash | ||
run: | | ||
cargo build --release -p aptos-debugger | ||
strip -s target/release/aptos-debugger | ||
cp target/release/aptos-debugger . | ||
- name: Install GCloud SDK | ||
uses: "google-github-actions/setup-gcloud@v2" | ||
with: | ||
version: ">= 418.0.0" | ||
install_components: "kubectl,gke-gcloud-auth-plugin" | ||
|
||
- name: get timestamp to use in cache key | ||
id: get-timestamp | ||
run: echo "ts=$(date +%s)" >> $GITHUB_OUTPUT | ||
|
||
- name: Load cached backup storage metadata cache dir (and save back afterwards) | ||
uses: actions/cache@v4 | ||
with: | ||
path: metadata_cache | ||
key: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ steps.get-timestamp.outputs.ts }} | ||
restore-keys: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}- | ||
|
||
- name: Generate job ranges | ||
id: gen-jobs | ||
env: | ||
BUCKET: ${{ inputs.BUCKET }} | ||
SUB_DIR: ${{ inputs.SUB_DIR }} | ||
run: | | ||
./aptos-debugger aptos-db gen-replay-verify-jobs \ | ||
--metadata-cache-dir ./metadata_cache \ | ||
--command-adapter-config ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} \ | ||
--start-version ${{ inputs.HISTORY_START }} \ | ||
--ranges-to-skip "${{ inputs.RANGES_TO_SKIP }}" \ | ||
--max-versions-per-range ${{ inputs.MAX_VERSIONS_PER_RANGE }} \ | ||
\ | ||
--max-ranges-per-job 16 \ | ||
--output-json-file jobs.json \ | ||
jq -c 'length as $N | [range(0; $N)]' jobs.json > job_ids.json | ||
cat job_ids.json | ||
jq . jobs.json | ||
echo "job_ids=$(cat job_ids.json)" >> $GITHUB_OUTPUT | ||
- name: Cache backup storage config and job definition | ||
uses: actions/cache/save@v4 | ||
with: | ||
path: | | ||
${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} | ||
jobs.json | ||
key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }} | ||
|
||
replay-verify: | ||
needs: prepare | ||
timeout-minutes: ${{ inputs.TIMEOUT_MINUTES || 180 }} | ||
runs-on: ${{ inputs.RUNS_ON }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
job_id: ${{ fromJson(needs.prepare.outputs.job_ids) }} | ||
steps: | ||
- name: Load cached aptos-debugger binary | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: | | ||
aptos-debugger | ||
key: aptos-debugger-${{ inputs.GIT_SHA || github.sha }} | ||
fail-on-cache-miss: true | ||
|
||
- name: Load cached backup storage metadata cache dir | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: metadata_cache | ||
key: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}- | ||
fail-on-cache-miss: true | ||
|
||
- name: Load cached backup storage config and job definitions | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: | | ||
${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} | ||
jobs.json | ||
key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }} | ||
fail-on-cache-miss: true | ||
|
||
- name: Install GCloud SDK | ||
uses: "google-github-actions/setup-gcloud@v2" | ||
with: | ||
version: ">= 418.0.0" | ||
install_components: "kubectl,gke-gcloud-auth-plugin" | ||
|
||
- name: Run replay-verify in parallel | ||
env: | ||
BUCKET: ${{ inputs.BUCKET }} | ||
SUB_DIR: ${{ inputs.SUB_DIR }} | ||
shell: bash | ||
run: | | ||
set -o nounset -o errexit -o pipefail | ||
replay() { | ||
idx=$1 | ||
id=$2 | ||
begin=$3 | ||
end=$4 | ||
desc=$5 | ||
echo --------- | ||
echo Job start. $id: $desc | ||
echo --------- | ||
MC=metadata_cache_$idx | ||
cp -r metadata_cache $MC | ||
DB=db_$idx | ||
for try in {0..6} | ||
do | ||
if [ $try -gt 0 ]; then | ||
SLEEP=$((10 * $try)) | ||
echo "sleeping for $SLEEP seconds before retry #$try" >&2 | ||
sleep $SLEEP | ||
fi | ||
res=0 | ||
./aptos-debugger aptos-db replay-verify \ | ||
--metadata-cache-dir $MC \ | ||
--command-adapter-config ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} \ | ||
--start-version $begin \ | ||
--end-version $end \ | ||
\ | ||
--lazy-quit \ | ||
--enable-storage-sharding \ | ||
--target-db-dir $DB \ | ||
--concurrent-downloads 8 \ | ||
--replay-concurrency-level 4 \ | ||
|| res=$? | ||
if [[ $res == 0 || $res == 2 ]] | ||
then | ||
return $res | ||
fi | ||
done | ||
return 1 | ||
} | ||
pids=() | ||
idx=0 | ||
while read id begin end desc; do | ||
replay $idx $id $begin $end "$desc" 2>&1 | sed "s/^/[partition $idx]: /" & | ||
pids[$idx]=$! | ||
idx=$((idx+1)) | ||
done < <(jq '.[${{ matrix.job_id }}][]' jobs.json) | ||
res=0 | ||
for idx in `seq 0 $((idx-1))` | ||
do | ||
range_res=0 | ||
wait ${pids[$idx]} || range_res=$? | ||
echo partition $idx returned $range_res | ||
if [[ $range_res != 0 ]] | ||
then | ||
res=$range_res | ||
fi | ||
done | ||
echo All partitions done, returning $res | ||
exit $res |
Oops, something went wrong.