diff --git a/infrastructure/nomad/playbooks/destroy.yml b/infrastructure/nomad/playbooks/destroy.yml index c4a3fc2a7..1294646e5 100644 --- a/infrastructure/nomad/playbooks/destroy.yml +++ b/infrastructure/nomad/playbooks/destroy.yml @@ -15,12 +15,11 @@ - name: Stop Jobs ansible.builtin.shell: | - NOMAD_JOBS=$(nomad job status -json | jq -r '.[].Summary.JobID') + NOMAD_JOBS=$(nomad job status -json | jq -r 'sort_by(.Allocations[0].CreateTime) | reverse | .[].Summary.JobID') for job in $(echo "${NOMAD_JOBS}" | grep -v artifacts); do if [ "${job}" != "null" ]; then nomad stop "${job}" - TIMEOUT=600 while true; do STATUS=$(nomad job status -json "${job}" | jq -r '.[0].Allocations[0].ClientStatus') case "${STATUS}" in @@ -29,11 +28,6 @@ ;; *) sleep 1 - TIMEOUT=$((TIMEOUT - 1)) - if [ "${TIMEOUT}" -eq 0 ]; then - echo "Timeout waiting for ${job} to stop has been exceeded." - return 1 - fi ;; esac done @@ -47,31 +41,31 @@ executable: bash when: backup is defined and backup - - name: Purge Stopped Jobs - ansible.builtin.shell: | - TIMEOUT=30 - while [ "$(nomad job status)" != "No running jobs" ]; do - RUNNING_JOBS=$(nomad job status -json | jq '[.[] | select(.Summary.Summary | to_entries[] | select(.value.Running > 0 or .value.Starting > 0)) | .Summary.JobID]') - if [ "${RUNNING_JOBS}" = "[]" ]; then - break - fi - sleep 1 - TIMEOUT=$((TIMEOUT - 1)) - if [ "${TIMEOUT}" -eq 0 ]; then - echo "Timeout waiting for jobs to stop has been exceeded." - return 1 - fi - done - - nomad var purge {{ nomad_vars_path }} - nomad system gc - args: - executable: bash - when: backup is defined and backup +# - name: Purge Stopped Jobs +# ansible.builtin.shell: | +# TIMEOUT=30 +# while [ "$(nomad job status)" != "No running jobs" ]; do +# RUNNING_JOBS=$(nomad job status -json | jq '[.[] | select(.Summary.Summary | to_entries[] | select(.value.Running > 0 or .value.Starting > 0))] | sort_by(.Allocations[0].CreateTime) | reverse | .[].Summary.JobID') +# if [ "${RUNNING_JOBS}" = "[]" ]; then +# break +# fi +# sleep 1 +# TIMEOUT=$((TIMEOUT - 1)) +# if [ "${TIMEOUT}" -eq 0 ]; then +# echo "Timeout waiting for jobs to stop has been exceeded." +# return 1 +# fi +# done +# +# nomad var purge {{ nomad_vars_path }} +# nomad system gc +# args: +# executable: bash +# when: backup is defined and backup - name: Force Stop and Purge Jobs ansible.builtin.shell: | - for job in $(nomad job status -json | jq -r '.[].Summary.JobID'); do + for job in $(nomad job status -json | jq -r 'sort_by(.Allocations[0].CreateTime) | reverse | .[].Summary.JobID'); do if [ "${job}" != "null" ]; then nomad stop -purge "${job}" fi diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 index b9010d71f..8d966f793 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-geth.nomad.j2 @@ -51,8 +51,7 @@ job "{{ job.name }}" { {% if env == 'testenv' %} resources { - cores = 2 - memory = 16384 + memory = 2048 } {% endif %} @@ -150,8 +149,8 @@ job "{{ job.name }}" { echo "Restoring from backup file ${BACKUP_FILE} to ${GETH_DATA_DIR}" echo "Backup file size: $(du -h ${BACKUP_FILE} | cut -f1)" + START_TIME=$(date +%s) local/mev-commit-geth \ - --cache=4096 \ --verbosity=5 \ --log.format="${GETH_LOG_FORMAT}" \ --log.tags="${GETH_LOG_TAGS}" \ @@ -159,7 +158,8 @@ job "{{ job.name }}" { import ${BACKUP_FILE} if [[ $? -eq 0 ]]; then - echo "Restore successful" + ELAPSED_TIME=$(($(date +%s) - START_TIME)) + echo "Restore finished in: $(date -u -d@${ELAPSED_TIME} +%H:%M:%S)" else echo "Restore failed" exit 1 @@ -325,8 +325,7 @@ job "{{ job.name }}" { {% if env == 'testenv' %} resources { - cores = 2 - memory = 16384 + memory = 2048 } {% endif %} @@ -386,7 +385,7 @@ job "{{ job.name }}" { fi BACKUP_FILE="local/backups/{{ version }}_{{ job.name }}-{% raw %}{{ env "NOMAD_ALLOC_INDEX" }}{% endraw %}_$(date +%Y%m%d%H%M%S)" - STATUS=$(nomad alloc status -address="http://{{ ansible_facts['default_ipv4']['address'] }}:4646" -json "${NOMAD_ALLOC_ID}") + STATUS=$(nomad alloc status -address="http://127.0.0.1:4646" -json "${NOMAD_ALLOC_ID}") NON_ZERO_EXIT_EVENTS=$(echo "$STATUS" | jq -r '.TaskStates.node.Events[] | select(.ExitCode != 0)') if [[ -n "${NON_ZERO_EXIT_EVENTS}" ]]; then echo "The main task did not start or finish gracefully" @@ -395,9 +394,9 @@ job "{{ job.name }}" { BACKUP_FILE+=".rlp" echo "Exporting chain data from ${GETH_DATA_DIR} to backup file: ${BACKUP_FILE}" + START_TIME=$(date +%s) chmod +x local/mev-commit-geth local/mev-commit-geth \ - --cache 4096 \ --verbosity=5 \ --log.format="$GETH_LOG_FORMAT" \ --log.tags="$GETH_LOG_TAGS" \ @@ -405,7 +404,8 @@ job "{{ job.name }}" { export ${BACKUP_FILE} if [[ "$?" -eq 0 ]] && [[ -f "${BACKUP_FILE}" ]]; then - echo "Backup successful" + ELAPSED_TIME=$(($(date +%s) - START_TIME)) + echo "Backup finished in: $(date -u -d@${ELAPSED_TIME} +%H:%M:%S)" echo "Backup file size: $(du -h ${BACKUP_FILE} | cut -f1)" else echo "Backup failed"