Skip to content

Commit

Permalink
feat: introduce nomad cluster GH workflow (#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrekucci authored May 22, 2024
1 parent 16e93c1 commit a7f56be
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 91 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/artifacts.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: mev-commit-artifacts
name: artifacts

on:
workflow_dispatch
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: mev-commit-ci
name: ci

on:
push:
Expand Down
96 changes: 51 additions & 45 deletions .github/workflows/infrstructure.yml
Original file line number Diff line number Diff line change
@@ -1,69 +1,75 @@
name: mev-commit-infrastructure
name: infrastructure

on:
workflow_run:
workflows: ["mev-commit-ci"]
workflows:
- ci
types:
- completed

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}

jobs:
deploy_and_test:
name: Deploy and Test mev-commit Minimal Infrastructure
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}
cluster:
name: Setup and Test Nomad devnet Cluster
runs-on: ubuntu-22.04
timeout-minutes: 120
# if: ${{ github.event.workflow_run.conclusion == 'success' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: recursive
ref: ${{ github.event.workflow_run.head_branch }}

- name: Install jq
- name: Install Dependencies
run: |
echo 'deb [trusted=yes] https://repo.goreleaser.com/apt/ /' | sudo tee /etc/apt/sources.list.d/goreleaser.list
sudo apt-get update
sudo apt-get install -y jq
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
sudo apt-get install --yes goreleaser
pip install boto3 botocore
pipx inject ansible-core botocore boto3
- name: Setup Docker Compose
- name: Configure Machine
run: |
sudo rm $(which docker-compose)
curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }}
aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws configure set default.region us-west-2
- name: Build and Start Services
run: ./mev-commit-cli.sh start minimal
cat <<-EOH > infrastructure/nomad/hosts.ini
[nomad_servers]
127.0.0.1 ansible_connection=local ansible_user=${USER}
- name: Check Service Endpoints
run: |
ips=("172.29.18.2" "172.29.18.3" "172.29.18.4")
[nomad_clients]
127.0.0.1 ansible_connection=local ansible_user=${USER}
EOH
ansible-inventory --inventory infrastructure/nomad/hosts.ini hosts.ini --list --yaml
ansible all --inventory infrastructure/nomad/hosts.ini --module-name ping
for ip in "${ips[@]}"; do
echo "Checking service at $ip"
- name: Initialize and Configure Cluster
run: |
./cluster.sh init --profile ci --debug
working-directory: infrastructure/nomad

attempts=0
success=false
while [ $attempts -lt 6 ]; do
response=$(curl -k -s "https://$ip:13523/topology")
bidders=$(echo "$response" | jq '.connected_peers.bidders | length')
providers=$(echo "$response" | jq '.connected_peers.providers | length')
if [[ -n "$response" && "$bidders" -gt 0 || "$providers" -gt 0 ]]; then
echo "Service at $ip is OK"
success=true
break
else
echo "Service check failed for $ip. Retrying in 30 seconds..."
attempts=$(( $attempts + 1 ))
sleep 30
fi
done
- name: Build Artifacts and Deploy Jobs
run: |
./cluster.sh deploy --profile ci --debug
working-directory: infrastructure/nomad

if [ "$success" == "false" ]; then
echo "Service check failed for $ip after 3 attempts, failing"
exit 1
fi
done
- name: Destroy Cluster
run: |
./cluster.sh destroy --profile ci --debug
working-directory: infrastructure/nomad

- name: Stop Services
run: ./mev-commit-cli.sh stop minimal
- name: Initialize Debug Shell
if: failure()
run: |
KEYS=$(curl -sSf -X POST https://eu.relay.tunshell.com/api/sessions)
echo "::add-mask::${KEYS}"
echo "Debug Shell:"
echo "sh <(curl -sSf https://lets.tunshell.com/init.sh) L $(echo ${KEYS} | jq -r .peer2_key) \${TUNSHELL_SECRET} eu.relay.tunshell.com"
curl -sSf https://lets.tunshell.com/init.sh | sh -s -- T $(echo ${KEYS} | jq -r .peer1_key) ${{ secrets.TUNSHELL_SECRET }} eu.relay.tunshell.com
2 changes: 1 addition & 1 deletion .github/workflows/releaser.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: mev-commit-releaser
name: releaser

on:
push:
Expand Down
13 changes: 7 additions & 6 deletions infrastructure/nomad/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,7 @@ Prepare `hosts.ini` File: This file contains the IP addresses of your Nomad serv
```
> Replace the 192.0.2.X and 198.51.100.X with the IP addresses of your Nomad server and client machines, respectively.
> Ensure the ansible_user matches the username on your target machines that has SSH access.
If your host machine is the same as your control machine add the following to your `hosts.ini` file:
```ini
[local]
127.0.0.1 ansible_connection=local
```
> If your host machine is the same as your control machine replace the IP addresses with `127.0.0.1 ansible_connection=local`.
If you do not want to use the SSH agent, another option is to add the following configuration to every `nomad_server` or
`nomad_client` record in the `host.ini` file: `ansible_ssh_private_key_file=/path/to/your/private_key`. For example:
Expand All @@ -82,6 +77,12 @@ If you do not want to use the SSH agent, another option is to add the following
198.51.100.2 ansible_user=ubuntu ansible_ssh_private_key_file=/path/to/your/private_key
```

Or if the private key is the same for all machines, you can set the `ansible_ssh_private_key_file` in the `[all:vars]` section:
```ini
[all:vars]
ansible_ssh_private_key_file=/path/to/your/private_key
```

## Cluster Management

To manage the Nomad cluster, use the `cluster.sh` script. This script allows you to initialize, deploy, and destroy the Nomad cluster.
Expand Down
14 changes: 11 additions & 3 deletions infrastructure/nomad/cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ deploy_version="HEAD"
profile_name="devnet"

help() {
echo "Usage: $0 [init [--profile <name=devnet>] [--skip-certificates-setup] [--debug]] [deploy [version=HEAD] [--profile <name=devnet>] [--force-build-templates] [--no-logs-collection] [--debug]] [destroy [--debug]] [--help]"
echo "Usage:"
echo "$0 [init [--profile <name=devnet>] [--skip-certificates-setup] [--debug]]"
echo "$0 [deploy [version=HEAD] [--profile <name=devnet>] [--force-build-templates] [--no-logs-collection] [--debug]]"
echo "$0 [destroy [--debug]] [--help]"
echo "$0 --help"
echo
echo "Parameters:"
echo " init Initialize the environment."
Expand Down Expand Up @@ -66,7 +70,11 @@ help() {
}

usage() {
echo "Usage: $0 [init [--profile <name=devnet>] [--skip-certificates-setup] [--debug]] [deploy [version=HEAD] [--profile <name=devnet>] [--force-build-templates] [--no-logs-collection] [--debug]] [destroy [--debug]] [--help]"
echo "Usage:"
echo "$0 [init [--profile <name=devnet>] [--skip-certificates-setup] [--debug]]"
echo "$0 [deploy [version=HEAD] [--profile <name=devnet>] [--force-build-templates] [--no-logs-collection] [--debug]]"
echo "$0 [destroy [--debug]] [--help]"
echo "$0 --help"
exit 1
}

Expand Down Expand Up @@ -236,7 +244,7 @@ main() {
;;
esac

ansible-playbook -i hosts.ini "${playbook}" "${flags[@]}"
ansible-playbook --inventory hosts.ini "${playbook}" "${flags[@]}"
}

main "$@"
8 changes: 4 additions & 4 deletions infrastructure/nomad/playbooks/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@
ansible.builtin.set_fact:
system_arch: "{{ 'arm64' if ansible_architecture == 'aarch64' else 'amd64' }}"

- name: Include Variables
include_vars:
file: vars.yml

- name: Check Operating System of the Target System
assert:
that:
- ansible_facts['os_family'] == "Debian"
fail_msg: "This playbook only supports Debian systems."
success_msg: "Operating system is supported."

- name: Include Variables
include_vars:
file: vars.yml

- name: Load AWS Caller Information
amazon.aws.aws_caller_info:
register: aws_caller_info
Expand Down
68 changes: 38 additions & 30 deletions infrastructure/nomad/playbooks/init.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,42 +18,13 @@
ansible.builtin.set_fact:
system_arch: "{{ 'arm64' if ansible_architecture == 'aarch64' else 'amd64' }}"

- name: Include Variables
include_vars:
file: vars.yml

- name: Check Operating System of the Target System
assert:
that:
- ansible_facts['os_family'] == "Debian"
fail_msg: "This playbook only supports Debian systems."
success_msg: "Operating system is supported."

- name: Load AWS Caller Information
amazon.aws.aws_caller_info:
register: aws_caller_info
delegate_to: localhost
become: true
become_user: "{{ lookup('env', 'USER') }}"

- name: Check AWS Caller Information
ansible.builtin.assert:
that:
- aws_caller_info is defined
- aws_caller_info.user_id is defined
- aws_caller_info.user_id | length > 0
fail_msg: "AWS caller information is invalid or empty."
success_msg: "AWS caller information is valid."

- name: Check Profile
ansible.builtin.assert:
that:
- profile is defined
- profile != ''
- profile in profiles
fail_msg: "The profile variable is not set correctly."
success_msg: "The profile variable is set to: {{ profile }}."

- name: Install Common Utility Packages and Dependencies
ansible.builtin.apt:
name:
Expand Down Expand Up @@ -93,7 +64,35 @@
- cpuid
- msr-tools
state: present
ignore_errors: yes

- name: Include Variables
include_vars:
file: vars.yml

- name: Load AWS Caller Information
amazon.aws.aws_caller_info:
register: aws_caller_info
delegate_to: localhost
become: true
become_user: "{{ lookup('env', 'USER') }}"

- name: Check AWS Caller Information
ansible.builtin.assert:
that:
- aws_caller_info is defined
- aws_caller_info.user_id is defined
- aws_caller_info.user_id | length > 0
fail_msg: "AWS caller information is invalid or empty."
success_msg: "AWS caller information is valid."

- name: Check Profile
ansible.builtin.assert:
that:
- profile is defined
- profile != ''
- profile in profiles
fail_msg: "The profile variable is not set correctly."
success_msg: "The profile variable is set to: {{ profile }}."

tasks:
- name: Add DataDog Repository Key
Expand Down Expand Up @@ -204,6 +203,7 @@
validate_certs: no
register: vault_init
when: vault_status.json.initialized == false
no_log: true

- name: Save Vault Unseal Key and Root Token
ansible.builtin.copy:
Expand All @@ -213,6 +213,7 @@
when: vault_status.json.initialized == false
become: true
become_user: "{{ hostvars[inventory_hostname].ansible_user }}"
no_log: true

- name: Determine Vault Seal Status
ansible.builtin.uri:
Expand All @@ -231,6 +232,7 @@
- vault_seal_status.json.sealed == true
become: true
become_user: "{{ hostvars[inventory_hostname].ansible_user }}"
no_log: true

- name: Parse Vault Initialization File
ansible.builtin.set_fact:
Expand All @@ -239,6 +241,7 @@
- vault_init_content is defined
- vault_status.json.initialized == true
- vault_seal_status.json.sealed == true
no_log: true

- name: Unseal Vault
ansible.builtin.uri:
Expand All @@ -260,12 +263,14 @@
retries: 5
delay: 10
when: vault_seal_status.json.sealed == true
no_log: true

- name: Fetch Secrets
ansible.builtin.set_fact:
mev_commit_secrets: "{{ lookup('amazon.aws.aws_secret', profile ~ '/' ~ vault_secret_path) }}"
when: unseal_result.json.sealed == false
delegate_to: localhost
no_log: true

- name: Enable KV Secrets Engine
ansible.builtin.uri:
Expand All @@ -286,6 +291,7 @@
when:
- vault_status.json.initialized == false
- unseal_result.json.sealed == false
no_log: true

- name: Write Retrieved Secret into Vault
ansible.builtin.uri:
Expand All @@ -299,6 +305,7 @@
status_code: [200, 204]
validate_certs: no
when: unseal_result.json.sealed == false
no_log: true

- name: Install Nomad
ansible.builtin.apt:
Expand Down Expand Up @@ -350,6 +357,7 @@
mode: "0644"
vars:
vault_token: "{{ vault_init.json.root_token }}"
no_log: true

- name: Restart and Enable Nomad Service
ansible.builtin.systemd:
Expand Down
14 changes: 14 additions & 0 deletions infrastructure/nomad/playbooks/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ tls_key_file: "{{ private_keys_dir }}/{{ server_common_name }}.key"
tls_ca_crt_file: "{{ ca_certificates_dir }}/ca-{{ server_common_name }}.crt"

profiles:
ci:
job_names:
- "mev-commit-geth-bootnode1"
- "mev-commit-geth-signer-node1"
- "mev-commit-geth-member-node"
- "deploy-contracts"
- "mev-commit-bootnode1"
- "mev-commit-provider-node1"
- "mev-commit-provider-node1-funder"
- "mev-commit-oracle"
- "mev-commit-bidder-node1"
- "mev-commit-bidder-node1-funder"
- "mev-commit-provider-emulator-node1"
- "mev-commit-bidder-emulator-node1"
devnet:
job_names:
- "datadog-agent-logs-collector"
Expand Down

0 comments on commit a7f56be

Please sign in to comment.