From d358bd17e76e6be778d0ddb97e541e08a7776d61 Mon Sep 17 00:00:00 2001 From: mrekucci Date: Mon, 8 Jul 2024 15:34:34 +0200 Subject: [PATCH] feat(infra): fail fast on first deployment error --- infrastructure/nomad/playbooks/deploy.yml | 12 +++++-- .../templates/jobs/artifacts.nomad.j2 | 10 ++++++ .../jobs/contracts-deployer.nomad.j2 | 22 +++++++++++- .../templates/jobs/datadog-agent.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-bridge.nomad.j2 | 12 +++++++ .../jobs/mev-commit-emulator.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-faucet.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-funder.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-geth.nomad.j2 | 12 +++++++ .../templates/jobs/mev-commit-oracle.nomad.j2 | 36 +++++++++++++++---- .../templates/jobs/mev-commit.nomad.j2 | 12 +++++++ p2p/examples/provideremulator/main.go | 2 -- p2p/integrationtest/provider/main.go | 2 -- 13 files changed, 153 insertions(+), 15 deletions(-) diff --git a/infrastructure/nomad/playbooks/deploy.yml b/infrastructure/nomad/playbooks/deploy.yml index 6770aec74..cf1671e18 100644 --- a/infrastructure/nomad/playbooks/deploy.yml +++ b/infrastructure/nomad/playbooks/deploy.yml @@ -598,6 +598,11 @@ while true; do STATUS=$(echo "${RESULT}" | jq -r '.[0].Allocations[0].ClientStatus') + if [ "${STATUS}" = "failed" ]; then + echo "Deployment failed for {{ job.name }}:" + echo "${RESULT}" + exit 1 + fi case "${JOB_TYPE}" in service) @@ -623,7 +628,7 @@ CURRENT_TIME="$(date +%s)" ELAPSED_TIME="$(( CURRENT_TIME - START_TIME ))" if [ ${ELAPSED_TIME} -ge ${TIMEOUT} ]; then - echo "Deploy timed out for {{ job.name }}, current status: ${STATUS}" + echo "Deployment timed out for {{ job.name }}, current status: ${STATUS}" exit 1 fi @@ -642,8 +647,9 @@ label: "{{ item.name }}" vars: job: "{{ item }}" - register: result - failed_when: result.rc != 0 + register: job_result + failed_when: job_result.rc != 0 + when: job_result is not defined or job_result.rc == 0 - name: Post Deployment Info ansible.builtin.debug: diff --git a/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 index eaf875527..a5a30ec05 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2 @@ -45,6 +45,16 @@ job "{% if env != 'devenv' %}{{ environments[env].version }}{% else %}artifacts- group "artifacts-group" { count = 1 + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 index 6f75af951..9e1d33b0a 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/contracts-deployer.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { dns { servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }} @@ -31,6 +43,14 @@ job "{{ job.name }}" { port = "{{ port_name }}" tags = ["{{ port_name }}"] provider = "nomad" + + check { + type = "http" + path = "/" + port = "{{ port_name }}" + interval = "10s" + timeout = "5s" + } } {% endfor %} @@ -145,7 +165,7 @@ job "{{ job.name }}" { echo "Failed to transfer ownership!" exit 1 fi - echo "Ownership transfered successfully." + echo "Ownership transferred successfully." python3 -m http.server {{ job.ports[0]['http']['static'] }} --directory /local/www # endtodo diff --git a/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 index 6d6756db9..0f9cf834f 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/datadog-agent.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 index d6ebbbe4d..d3daeded1 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-bridge.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 index 1a387c401..afa29d354 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 index 674abb253..4d4c58479 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-faucet.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 index f7f18ec36..6f86b01b5 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-funder.nomad.j2 @@ -6,6 +6,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { dns { servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }} diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 index 2f04b0399..c78d8c3ee 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 index 45551684b..20f56d485 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-oracle.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" @@ -87,16 +99,15 @@ job "{{ job.name }}" { mkdir -p /var/run/postgresql > /dev/null 2>&1 pg_ctl initdb --silent --pgdata="${POSTGRES_DATA}" if [ $? -ne 0 ]; then - echo "Failed to initialize database." + echo "Failed to initialize PostgreSQL." exit 1 fi cp /alloc/data/postgres.env "${POSTGRES_DATA}/.env" - postgres -D "${POSTGRES_DATA}" & - pid=$! - if ! timeout 5m bash -c 'until pg_ctl status --pgdata="${POSTGRES_DATA}" --silent --no-wait; do sleep 1; done'; then - echo "Waiting for PostgreSQL to start..." - sleep 1 + pg_ctl start --pgdata="${POSTGRES_DATA}" --silent --wait --timeout=300 > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "Failed to start PostgreSQL." + exit 1 fi createuser --superuser postgres > /dev/null 2>&1 @@ -109,7 +120,13 @@ job "{{ job.name }}" { GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO ${POSTGRES_USERNAME};" echo "Database initialized and configured successfully." - wait $pid + pg_ctl stop --pgdata="${POSTGRES_DATA}" --silent --wait --timeout=300 > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "Failed to stop PostgreSQL." + exit 1 + fi + + postgres -D "${POSTGRES_DATA}" {% endraw %} EOH destination = "local/run.sh" @@ -303,6 +320,11 @@ job "{{ job.name }}" { export MEV_ORACLE_PG_PASSWORD="${POSTGRES_PASSWORD}" export MEV_ORACLE_PG_DBNAME="${POSTGRES_DB}" + if ! timeout 5m bash -c 'until pg_isready -h ${MEV_ORACLE_PG_HOST} -p ${MEV_ORACLE_PG_PORT} -U ${MEV_ORACLE_PG_USER} -d ${MEV_ORACLE_PG_DBNAME}; do sleep 1; done'; then + echo "Waiting for PostgreSQL to start..." + sleep 1 + fi + chmod +x local/mev-commit-oracle local/mev-commit-oracle start {% endraw %} diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 index c10eeefc9..df0f366ad 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 @@ -5,6 +5,18 @@ job "{{ job.name }}" { group "{{ job.name }}-group" { count = {{ job.count }} + {% if env == 'devenv' %} + restart { + attempts = 0 + mode = "fail" + } + + reschedule { + attempts = 0 + unlimited = false + } + {% endif %} + network { mode = "bridge" diff --git a/p2p/examples/provideremulator/main.go b/p2p/examples/provideremulator/main.go index 329279481..2820064d1 100644 --- a/p2p/examples/provideremulator/main.go +++ b/p2p/examples/provideremulator/main.go @@ -54,8 +54,6 @@ func main() { return } - fmt.Printf("connected to provider %s, receiving bids...\n", *serverAddr) - for { select { case bid, more := <-bidS: diff --git a/p2p/integrationtest/provider/main.go b/p2p/integrationtest/provider/main.go index 2e55ca74c..01a0bba6d 100644 --- a/p2p/integrationtest/provider/main.go +++ b/p2p/integrationtest/provider/main.go @@ -131,8 +131,6 @@ func main() { return } - fmt.Printf("connected to provider %s, receiving bids...\n", *serverAddr) - for bid := range bidS { receivedBids.Inc() buf, err := json.Marshal(bid)