Skip to content

Commit

Permalink
fix: introduce backups for disaster recovery
Browse files Browse the repository at this point in the history
  • Loading branch information
mrekucci committed Nov 26, 2024
1 parent 6242f1d commit a339c63
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 0 deletions.
117 changes: 117 additions & 0 deletions infrastructure/nomad/playbooks/templates/jobs/backup.nomad.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#jinja2: trim_blocks:True, lstrip_blocks:True
job "backup-{{ environments[env].version }}" {
datacenters = ["{{ datacenter }}"]

type = "batch"

# The cron will run the job every hour.
periodic {
cron = "0 * * * *"
}

group "backup-group" {
count = 1

network {
mode = "bridge"

dns {
servers = {{ (ansible_facts['dns']['nameservers'] + ['1.1.1.1']) | tojson }}
}
}

volume "backups-volume" {
type = "host"
source = "backups-volume"
read_only = false
}

task "backup" {
driver = "exec"

{% if env == 'devenv' %}
resources {
memory = 2048
}
{% endif %}

volume_mount {
volume = "backups-volume"
destination = "/local/backups"
read_only = false
}

{% if env != 'devenv' %}
artifact {
source = "https://primev-infrastructure-artifacts.s3.us-west-2.amazonaws.com/mev-commit-geth_{{ version }}_Linux_{{ target_system_architecture }}.tar.gz"
}
{% else %}
artifact {
source = "http://{{ ansible_facts['default_ipv4']['address'] }}:1111/mev-commit-geth_{{ version }}_Linux_{{ target_system_architecture }}.tar.gz"
}
{% endif %}

template {
data = <<-EOH
{%- raw %}
GETH_DATA_DIR="/local/data/{% endraw %}{{ job.name }}{% raw %}/node-{{ env "NOMAD_ALLOC_INDEX" }}"
{% endraw %}
GETH_LOG_FORMAT="{{ job.env.get('log-format', 'json') }}"
GETH_LOG_TAGS="{{ 'service.name:' + job.name + '-{{ env "NOMAD_ALLOC_INDEX" }}' + ',service.version:' + version }}"
EOH
destination = "secrets/.env"
env = true
}

template {
data = <<-EOH
#!/usr/bin/env bash

{%- raw %}
{{- range nomadService "datadog-agent-logs-collector" }}
{{ if contains "tcp" .Tags }}
exec > >(nc {{ .Address }} {{ .Port }}) 2>&1
{{ end }}
{{- end }}
{% endraw %}

BACKUP_FILE="local/backups/{{ version }}_{{ job.name }}-{% raw %}{{ env "NOMAD_ALLOC_INDEX" }}{% endraw %}_$(date +%Y%m%d%H%M%S)"
STATUS=$(nomad alloc status -address="http://127.0.0.1:4646" -json "${NOMAD_ALLOC_ID}")
NON_ZERO_EXIT_EVENTS=$(echo "$STATUS" | jq -r '.TaskStates.node.Events[] | select(.ExitCode != 0)')
if [[ -n "${NON_ZERO_EXIT_EVENTS}" ]]; then
echo "The main task did not start or finish gracefully"
BACKUP_FILE+="-dirty"
fi
BACKUP_FILE+=".rlp"

echo "Exporting chain data to backup file: ${BACKUP_FILE}"
START_TIME=$(date +%s)
chmod +x local/mev-commit-geth
local/mev-commit-geth \
--verbosity=5 \
--log.format="${GETH_LOG_FORMAT}" \
--log.tags="${GETH_LOG_TAGS}" \
--datadir="${GETH_DATA_DIR}" \
export ${BACKUP_FILE}

if [[ "$?" -eq 0 ]] && [[ -f "${BACKUP_FILE}" ]]; then
ELAPSED_TIME=$(($(date +%s) - START_TIME))
echo "Backup finished in: $(date -u -d@${ELAPSED_TIME} +%H:%M:%S)"
echo "Backup file size: $(du -h ${BACKUP_FILE} | cut -f1)"
else
echo "Backup failed"
exit 1
fi
EOH
destination = "local/run.sh"
change_mode = "noop"
perms = "0755"
}

config {
command = "bash"
args = ["-c", "exec local/run.sh"]
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ client {
path = "{{ ansible_user_home }}/{{ env }}/artifacts"
}
{% endif %}
{% if env == "testnet" or env == "mainnet" %}
host_volume "backups-volume" {
path = "{{ ansible_user_home }}/{{ env }}/backups"
}
{% endif %}
}
{% endif %}

Expand Down
5 changes: 5 additions & 0 deletions infrastructure/nomad/playbooks/variables/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,10 @@ jobs:
to: 8080
env:

backups: &backups_job
name: backups
template: backups.nomad.j2

profiles:
ci:
jobs:
Expand Down Expand Up @@ -734,6 +738,7 @@ profiles:
- *mev_commit_provider_emulator_node1_job
- *mev_commit_faucet_job
- *datadog_agent_metrics_collector_job
- *backups_job

stressnet:
jobs:
Expand Down

0 comments on commit a339c63

Please sign in to comment.