From fd7f14ee6c1d1bcc9b3e7a151464ed90f76b6de2 Mon Sep 17 00:00:00 2001 From: mrekucci Date: Mon, 8 Jul 2024 15:34:34 +0200 Subject: [PATCH] feat(infra): fail fast on first deployment error --- .github/workflows/infrastructure.yml | 2 +- infrastructure/nomad/playbooks/deploy.yml | 12 +++++-- .../templates/jobs/artifacts.nomad.j2 | 10 ++++++ .../jobs/contracts-deployer.nomad.j2 | 12 +++++++ .../templates/jobs/datadog-agent.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-bridge.nomad.j2 | 12 +++++++ .../jobs/mev-commit-emulator.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-faucet.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-funder.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-geth.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-oracle.nomad.j2 | 36 +++++++++++++++---- .../templates/jobs/mev-commit.nomad.j2 | 12 +++++++ p2p/examples/provideremulator/main.go | 2 -- p2p/integrationtest/provider/main.go | 2 -- p2p/integrationtest/real-bidder/main.go | 2 +- 15 files changed, 146 insertions(+), 16 deletions(-) diff --git a/.github/workflows/infrastructure.yml b/.github/workflows/infrastructure.yml index 8f616bc91..c89c0bce2 100644 --- a/.github/workflows/infrastructure.yml +++ b/.github/workflows/infrastructure.yml @@ -45,7 +45,7 @@ jobs: cluster: name: Setup and Test Nomad Cluster runs-on: ubuntu-22.04 - timeout-minutes: 180 + timeout-minutes: 60 steps: - name: Print System Information diff --git a/infrastructure/nomad/playbooks/deploy.yml b/infrastructure/nomad/playbooks/deploy.yml index 6770aec74..cf1671e18 100644 --- a/infrastructure/nomad/playbooks/deploy.yml +++ b/infrastructure/nomad/playbooks/deploy.yml @@ -598,6 +598,11 @@ while true; do STATUS=$(echo "${RESULT}" | jq -r '.[0].Allocations[0].ClientStatus') + if [ "${STATUS}" = "failed" ]; then + echo "Deployment failed for {{ job.name }}:" + echo "${RESULT}" + exit 1 + fi case "${JOB_TYPE}" in service) @@ -623,7 +628,7 @@ CURRENT_TIME="$(date +%s)" ELAPSED_TIME="$(( CURRENT_TIME - START_TIME ))" if [ ${ELAPSED_TIME} -ge ${TIMEOUT} ]; then - echo "Deploy timed out for {{ job.name }}, current status: ${STATUS}" + echo "Deployment timed out for {{ job.name }}, current status: ${STATUS}" exit 1 fi @@ -642,8 +647,9 @@ label: "{{ item.name }}" vars: job: "{{ item }}" - register: result - failed_when: result.rc != 0 + register: job_result + failed_when: job_result.rc != 0 + when: job_result is not defined or job_result.rc == 0 - name: Post Deployment Info ansible.builtin.debug: diff --git a/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 index eaf875527..a5a30ec05 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 @@ -45,6 +45,16 @@ job "{% if env != 'devenv' %}{{ environments[env].version }}{% else %}artifacts- group "artifacts-group" { count = 1 + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 index 6f75af951..739db7595 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { dns { servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }} diff --git a/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 index 6d6756db9..0f9cf834f 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 index d6ebbbe4d..d3daeded1 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 index 1a387c401..afa29d354 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 index 674abb253..4d4c58479 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 index f7f18ec36..6f86b01b5 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 @@ -6,6 +6,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { dns { servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }} diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 index 2f04b0399..c78d8c3ee 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 index 45551684b..20f56d485 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" @@ -87,16 +99,15 @@ job "{{ job.name }}" { mkdir -p /var/run/postgresql > /dev/null 2>&1 pg_ctl initdb --silent --pgdata="${POSTGRES_DATA}" if [ $? -ne 0 ]; then - echo "Failed to initialize database." + echo "Failed to initialize PostgreSQL." exit 1 fi cp /alloc/data/postgres.env "${POSTGRES_DATA}/.env" - postgres -D "${POSTGRES_DATA}" & - pid=$! - if ! timeout 5m bash -c 'until pg_ctl status --pgdata="${POSTGRES_DATA}" --silent --no-wait; do sleep 1; done'; then - echo "Waiting for PostgreSQL to start..." - sleep 1 + pg_ctl start --pgdata="${POSTGRES_DATA}" --silent --wait --timeout=300 > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "Failed to start PostgreSQL." + exit 1 fi createuser --superuser postgres > /dev/null 2>&1 @@ -109,7 +120,13 @@ job "{{ job.name }}" { GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO ${POSTGRES_USERNAME};" echo "Database initialized and configured successfully." - wait $pid + pg_ctl stop --pgdata="${POSTGRES_DATA}" --silent --wait --timeout=300 > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "Failed to stop PostgreSQL." + exit 1 + fi + + postgres -D "${POSTGRES_DATA}" {% endraw %} EOH destination = "local/run.sh" @@ -303,6 +320,11 @@ job "{{ job.name }}" { export MEV_ORACLE_PG_PASSWORD="${POSTGRES_PASSWORD}" export MEV_ORACLE_PG_DBNAME="${POSTGRES_DB}" + if ! timeout 5m bash -c 'until pg_isready -h ${MEV_ORACLE_PG_HOST} -p ${MEV_ORACLE_PG_PORT} -U ${MEV_ORACLE_PG_USER} -d ${MEV_ORACLE_PG_DBNAME}; do sleep 1; done'; then + echo "Waiting for PostgreSQL to start..." + sleep 1 + fi + chmod +x local/mev-commit-oracle local/mev-commit-oracle start {% endraw %} diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 index c10eeefc9..df0f366ad 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/p2p/examples/provideremulator/main.go b/p2p/examples/provideremulator/main.go index 329279481..2820064d1 100644 --- a/p2p/examples/provideremulator/main.go +++ b/p2p/examples/provideremulator/main.go @@ -54,8 +54,6 @@ func main() { return } - fmt.Printf("connected to provider %s, receiving bids...\n", *serverAddr) - for { select { case bid, more := <-bidS: diff --git a/p2p/integrationtest/provider/main.go b/p2p/integrationtest/provider/main.go index 2e55ca74c..01a0bba6d 100644 --- a/p2p/integrationtest/provider/main.go +++ b/p2p/integrationtest/provider/main.go @@ -131,8 +131,6 @@ func main() { return } - fmt.Printf("connected to provider %s, receiving bids...\n", *serverAddr) - for bid := range bidS { receivedBids.Inc() buf, err := json.Marshal(bid) diff --git a/p2p/integrationtest/real-bidder/main.go b/p2p/integrationtest/real-bidder/main.go index b02943058..7e903c2c6 100644 --- a/p2p/integrationtest/real-bidder/main.go +++ b/p2p/integrationtest/real-bidder/main.go @@ -225,7 +225,7 @@ func main() { } bundleLen := rand.Intn(10) - bundleStart := rand.Intn(len(currentBlock.txns) - 1) + bundleStart := rand.Intn(len(currentBlock.txns)) bundleEnd := bundleStart + bundleLen if bundleEnd > len(currentBlock.txns) { bundleEnd = len(currentBlock.txns) - 1