Skip to content

Commit

Permalink
feat(infra): fail fast on first deployment error
Browse files Browse the repository at this point in the history
  • Loading branch information
mrekucci committed Jul 11, 2024
1 parent 413038a commit fd7f14e
Show file tree
Hide file tree
Showing 15 changed files with 146 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/infrastructure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
cluster:
name: Setup and Test Nomad Cluster
runs-on: ubuntu-22.04
timeout-minutes: 180
timeout-minutes: 60

steps:
- name: Print System Information
Expand Down
12 changes: 9 additions & 3 deletions infrastructure/nomad/playbooks/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,11 @@
while true; do
STATUS=$(echo "${RESULT}" | jq -r '.[0].Allocations[0].ClientStatus')
if [ "${STATUS}" = "failed" ]; then
echo "Deployment failed for {{ job.name }}:"
echo "${RESULT}"
exit 1
fi
case "${JOB_TYPE}" in
service)
Expand All @@ -623,7 +628,7 @@
CURRENT_TIME="$(date +%s)"
ELAPSED_TIME="$(( CURRENT_TIME - START_TIME ))"
if [ ${ELAPSED_TIME} -ge ${TIMEOUT} ]; then
echo "Deploy timed out for {{ job.name }}, current status: ${STATUS}"
echo "Deployment timed out for {{ job.name }}, current status: ${STATUS}"
exit 1
fi
Expand All @@ -642,8 +647,9 @@
label: "{{ item.name }}"
vars:
job: "{{ item }}"
register: result
failed_when: result.rc != 0
register: job_result
failed_when: job_result.rc != 0
when: job_result is not defined or job_result.rc == 0

- name: Post Deployment Info
ansible.builtin.debug:
Expand Down
10 changes: 10 additions & 0 deletions infrastructure/nomad/playbooks/templates/jobs/artifacts.nomad.j2
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ job "{% if env != 'devenv' %}{{ environments[env].version }}{% else %}artifacts-
group "artifacts-group" {
count = 1

restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}

network {
mode = "bridge"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
dns {
servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
mode = "bridge"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
mode = "bridge"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
mode = "bridge"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
mode = "bridge"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
dns {
servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
mode = "bridge"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
mode = "bridge"

Expand Down Expand Up @@ -87,16 +99,15 @@ job "{{ job.name }}" {
mkdir -p /var/run/postgresql > /dev/null 2>&1
pg_ctl initdb --silent --pgdata="${POSTGRES_DATA}"
if [ $? -ne 0 ]; then
echo "Failed to initialize database."
echo "Failed to initialize PostgreSQL."
exit 1
fi
cp /alloc/data/postgres.env "${POSTGRES_DATA}/.env"
postgres -D "${POSTGRES_DATA}" &
pid=$!

if ! timeout 5m bash -c 'until pg_ctl status --pgdata="${POSTGRES_DATA}" --silent --no-wait; do sleep 1; done'; then
echo "Waiting for PostgreSQL to start..."
sleep 1
pg_ctl start --pgdata="${POSTGRES_DATA}" --silent --wait --timeout=300 > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Failed to start PostgreSQL."
exit 1
fi

createuser --superuser postgres > /dev/null 2>&1
Expand All @@ -109,7 +120,13 @@ job "{{ job.name }}" {
GRANT ALL PRIVILEGES ON DATABASE ${POSTGRES_DB} TO ${POSTGRES_USERNAME};"
echo "Database initialized and configured successfully."

wait $pid
pg_ctl stop --pgdata="${POSTGRES_DATA}" --silent --wait --timeout=300 > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Failed to stop PostgreSQL."
exit 1
fi

postgres -D "${POSTGRES_DATA}"
{% endraw %}
EOH
destination = "local/run.sh"
Expand Down Expand Up @@ -303,6 +320,11 @@ job "{{ job.name }}" {
export MEV_ORACLE_PG_PASSWORD="${POSTGRES_PASSWORD}"
export MEV_ORACLE_PG_DBNAME="${POSTGRES_DB}"

if ! timeout 5m bash -c 'until pg_isready -h ${MEV_ORACLE_PG_HOST} -p ${MEV_ORACLE_PG_PORT} -U ${MEV_ORACLE_PG_USER} -d ${MEV_ORACLE_PG_DBNAME}; do sleep 1; done'; then
echo "Waiting for PostgreSQL to start..."
sleep 1
fi

chmod +x local/mev-commit-oracle
local/mev-commit-oracle start
{% endraw %}
Expand Down
12 changes: 12 additions & 0 deletions infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ job "{{ job.name }}" {
group "{{ job.name }}-group" {
count = {{ job.count }}

{% if env == 'devenv' %}
restart {
attempts = 0
mode = "fail"
}

reschedule {
attempts = 0
unlimited = false
}
{% endif %}

network {
mode = "bridge"

Expand Down
2 changes: 0 additions & 2 deletions p2p/examples/provideremulator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ func main() {
return
}

fmt.Printf("connected to provider %s, receiving bids...\n", *serverAddr)

for {
select {
case bid, more := <-bidS:
Expand Down
2 changes: 0 additions & 2 deletions p2p/integrationtest/provider/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,6 @@ func main() {
return
}

fmt.Printf("connected to provider %s, receiving bids...\n", *serverAddr)

for bid := range bidS {
receivedBids.Inc()
buf, err := json.Marshal(bid)
Expand Down
2 changes: 1 addition & 1 deletion p2p/integrationtest/real-bidder/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func main() {
}

bundleLen := rand.Intn(10)
bundleStart := rand.Intn(len(currentBlock.txns) - 1)
bundleStart := rand.Intn(len(currentBlock.txns))
bundleEnd := bundleStart + bundleLen
if bundleEnd > len(currentBlock.txns) {
bundleEnd = len(currentBlock.txns) - 1
Expand Down

0 comments on commit fd7f14e

Please sign in to comment.