Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable backup/restore of Geth db #365

Merged
merged 2 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions infrastructure/nomad/cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ no_logs_collection_flag=false
force_build_templates_flag=false
skip_certificates_setup_flag=false
release_flag=false
backup_flag=false
deploy_version="HEAD"
environment_name="devenv"
profile_name="devnet"
Expand All @@ -21,7 +22,7 @@ help() {
echo "Usage:"
echo "$0 [init [--environment <name=devenv>] [--skip-certificates-setup] [--debug]]"
echo "$0 [deploy [version=HEAD] [--environment <name=devenv>] [--profile <name=devnet>] [--force-build-templates] [--no-logs-collection] [--datadog-key <key>] [--l1-rpc-url <url>] [--otel-collector-endpoint-url <url>] [--release] [--debug]]"
echo "$0 [destroy [--debug]] [--help]"
echo "$0 [destroy [--backup] [--debug]]"
echo "$0 --help"
echo
echo "Parameters:"
Expand All @@ -42,7 +43,8 @@ help() {
echo " --debug Enable debug mode for detailed output."
echo
echo " destroy Destroy the whole cluster."
echo " --debug Enable debug mode for detailed output."
echo " --backup Create a backup before destroying the environment."
echo " --debug Enable debug mode for detailed output."
echo
echo " --help Display this help message."
echo
Expand Down Expand Up @@ -71,16 +73,16 @@ help() {
echo " Deploy with a specific version, environment, profile in debug mode with disabled logs collection, Datadog API key, L1 RPC URL, and OpenTememetry Collector Endpoint URL:"
echo " $0 deploy v0.1.0 --environment devenv --profile testnet --no-logs-collection --datadog-key your_datadog_key --l1-rpc-url your_rpc_url --otel-collector-endpoint-url your_otel_url --debug"
echo
echo " Destroy with specific environment and debug mode:"
echo " $0 destroy --environment devenv --debug"
echo " Destroy all jobs but backup before do so:"
echo " $0 destroy --backup --debug"
exit 1
}

usage() {
echo "Usage:"
echo "$0 [init [--environment <name=devenv>] [--skip-certificates-setup] [--debug]]"
echo "$0 [deploy [version=HEAD] [--environment <name=devenv>] [--profile <name=devnet>] [--force-build-templates] [--no-logs-collection] [--datadog-key <key>] [--l1-rpc-url <url>] [--otel-collector-endpoint-url <url>] [--release] [--debug]]"
echo "$0 [destroy [--debug]] [--help]"
echo "$0 [destroy [--backup] [--debug]]"
echo "$0 --help"
exit 1
}
Expand Down Expand Up @@ -243,6 +245,10 @@ parse_args() {
destroy)
destroy_flag=true
shift
if [[ $# -gt 0 && $1 == "--backup" ]]; then
backup_flag=true
shift
fi
if [[ $# -gt 0 && $1 == "--debug" ]]; then
debug_flag=true
shift
Expand Down Expand Up @@ -288,6 +294,7 @@ main() {
;;
"${destroy_flag}")
playbook+="destroy.yml"
[[ "${backup_flag}" == true ]] && flags+=("--extra-vars" "backup=true")
;;
*)
usage
Expand Down
21 changes: 16 additions & 5 deletions infrastructure/nomad/playbooks/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
when: version is not defined or version == '' or release

- name: Set Artifacts Build Version
set_fact:
ansible.builtin.set_fact:
build_artifacts: true
version: "{{ artifacts_build_version.stdout }}"
when: version is not defined or version == '' or release
Expand Down Expand Up @@ -124,11 +124,11 @@
success_msg: "The profile name is set to: {{ profile }}."

- name: Set Jobs Definition
set_fact:
ansible.builtin.set_fact:
jobs: "{{ profiles[profile].jobs }}"

- name: Disable Logs Collection
set_fact:
ansible.builtin.set_fact:
jobs: >-
{{
jobs
Expand All @@ -138,7 +138,7 @@
when: no_logs_collection | default(false) | bool

- name: Disable OpenTelemetry Trace Collection
set_fact:
ansible.builtin.set_fact:
jobs: >-
{{
jobs
Expand Down Expand Up @@ -166,7 +166,7 @@
register: existing_environment

- name: Set Existing Scripts Artifact Version as Stale
set_fact:
ansible.builtin.set_fact:
build_templates: >-
{{
(existing_environment.stdout | from_json) != environments[env]
Expand All @@ -187,6 +187,16 @@
Build Templates: {{ 'yes' if build_templates | default(false) else 'no' }}

tasks:
- name: Determine Cluster Status
ansible.builtin.shell: |
STATUS=$(nomad job status -json)
if [ "${STATUS}" != "No running jobs" ]; then
echo "Cluster has running jobs."
exit 1
fi
args:
executable: bash

- name: Build keystore-generator
ansible.builtin.shell: |
BINARY_PATH="{{ dist_dir }}/keystore-generator-{{ environments[env].version }}"
Expand Down Expand Up @@ -654,6 +664,7 @@
ansible.builtin.shell: |
nomad var purge "nomad/jobs"
nomad system gc
nomad var put -namespace=default "nomad/jobs" MEV_COMMIT_GETH_CHAIN_BACKUP="false"
args:
executable: bash

Expand Down
57 changes: 52 additions & 5 deletions infrastructure/nomad/playbooks/destroy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,67 @@
hosts: nomad_clients
gather_facts: no

vars:
nomad_vars_path: "nomad/jobs"

tasks:
- name: Stop and Purge Jobs
- name: Set Backup Var
ansible.builtin.shell: |
nomad var put -force -namespace=default {{ nomad_vars_path }} MEV_COMMIT_GETH_CHAIN_BACKUP="true"
args:
executable: bash
when: backup is defined and backup

- name: Stop Jobs
ansible.builtin.shell: |
for job in $(nomad job status -json | jq -r '.[].Summary.JobID'); do
NOMAD_JOBS=$(nomad job status -json | jq -r '.[].Summary.JobID')
for job in $(echo "${NOMAD_JOBS}" | grep -v artifacts); do
if [ "${job}" != "null" ]; then
nomad stop -purge "${job}"
nomad stop "${job}"

TIMEOUT=600
while true; do
STATUS=$(nomad job status -json "${job}" | jq -r '.[0].Allocations[0].ClientStatus')
case "${STATUS}" in
"failed" | "complete")
break
;;
*)
sleep 1
TIMEOUT=$((TIMEOUT - 1))
if [ "${TIMEOUT}" -eq 0 ]; then
echo "Timeout waiting for ${job} to stop has been exceeded."
return 1
fi
;;
esac
done
fi
done

if echo "${NOMAD_JOBS}" | grep -q artifacts; then
nomad stop -yes "artifacts"
fi
args:
executable: bash

- name: Purge Cluster
- name: Purge Stopped Jobs
ansible.builtin.shell: |
nomad var purge "nomad/jobs"
TIMEOUT=30
while [ "$(nomad job status)" != "No running jobs" ]; do
RUNNING_JOBS=$(nomad job status -json | jq '[.[] | select(.Summary.Summary | to_entries[] | select(.value.Running > 0 or .value.Starting > 0)) | .Summary.JobID]')
if [ "${RUNNING_JOBS}" = "[]" ]; then
break
fi
sleep 1
TIMEOUT=$((TIMEOUT - 1))
if [ "${TIMEOUT}" -eq 0 ]; then
echo "Timeout waiting for jobs to stop has been exceeded."
return 1
fi
done

nomad var purge {{ nomad_vars_path }}
nomad system gc
args:
executable: bash
9 changes: 9 additions & 0 deletions infrastructure/nomad/playbooks/init.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,15 @@
become: true
become_user: "{{ ansible_user }}"

- name: Ensure "/tmp/{{ env }}" Directory Exists
ansible.builtin.file:
path: "/tmp/{{ env }}"
state: directory
mode: "0744"
recurse: yes
become: true
become_user: "{{ ansible_user }}"

tasks:
- name: Add DataDog Repository Key
ansible.builtin.apt_key:
Expand Down
Loading
Loading