infrastructure #42
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: infrastructure | |
on: | |
workflow_call: | |
workflow_dispatch: | |
inputs: | |
profile: | |
description: 'Profile' | |
type: choice | |
options: | |
- devnet | |
- testnet | |
- stressnet | |
default: 'devnet' | |
all_targets: | |
description: 'All Arch & Os Targets' | |
type: boolean | |
default: false | |
debug: | |
description: 'Debug Deployment' | |
type: boolean | |
default: false | |
logs: | |
description: 'Collect Logs' | |
type: boolean | |
default: false | |
target_machine: | |
description: 'Target Machine' | |
type: choice | |
options: | |
- lax1 | |
- chi1 | |
- mia2 | |
- mia3 | |
default: 'lax' | |
permissions: | |
contents: read | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event.inputs.target_machine }} | |
cancel-in-progress: true | |
jobs: | |
cluster: | |
name: Setup and Test Nomad Cluster | |
#runs-on: ubuntu-22.04 | |
runs-on: self-hosted | |
timeout-minutes: 180 | |
steps: | |
- name: Setup Environment | |
run: | | |
RUNNER_START_TIME="$(date +%s)" | |
echo "${{ secrets.INFRASTRUCTURE_DNS_RECORDS }}" | sudo tee -a /etc/hosts | |
sudo systemctl restart systemd-resolved | |
IS_MANUAL_DEPLOYMENT=$([ "${{ github.event_name }}" == "workflow_dispatch" ] && echo true || echo false) | |
CLUSTER_ENVIRONMENT_FLAG="--environment devenv" | |
CLUSTER_PROFILE_FLAG=$([ "${IS_MANUAL_DEPLOYMENT}" == "true" ] && echo "--profile ${{ github.event.inputs.profile }}" || echo "--profile ci") | |
CLUSTER_LOGS_FLAG=$([ "${{ github.event.inputs.logs }}" == "false" ] && echo "--no-logs-collection" || echo "") | |
CLUSTER_DATADOG_KEY_FLAG=$([ "${IS_MANUAL_DEPLOYMENT}" == "true" ] && echo "--datadog-key ${{ secrets.DATADOG_API_KEY }}" || echo "") | |
CLUSTER_DEBUG_FLAG=$([ "${{ github.event.inputs.debug }}" == "true" ] && echo "--debug" || echo "") | |
TARGET_MACHINE_IP=$([ "${IS_MANUAL_DEPLOYMENT}" == "true" ] && echo "$(dig +short ${{ github.event.inputs.target_machine }})" || echo "127.0.0.1") | |
echo "RUNNER_START_TIME=${RUNNER_START_TIME}" >> ${GITHUB_ENV} | |
echo "IS_MANUAL_DEPLOYMENT=${IS_MANUAL_DEPLOYMENT}" >> ${GITHUB_ENV} | |
echo "CLUSTER_ENVIRONMENT_FLAG=${CLUSTER_ENVIRONMENT_FLAG}" >> ${GITHUB_ENV} | |
echo "CLUSTER_PROFILE_FLAG=${CLUSTER_PROFILE_FLAG}" >> ${GITHUB_ENV} | |
echo "CLUSTER_LOGS_FLAG=${CLUSTER_LOGS_FLAG}" >> ${GITHUB_ENV} | |
echo "CLUSTER_DATADOG_KEY_FLAG=${CLUSTER_DATADOG_KEY_FLAG}" >> ${GITHUB_ENV} | |
echo "CLUSTER_DEBUG_FLAG=${CLUSTER_DEBUG_FLAG}" >> ${GITHUB_ENV} | |
echo "TARGET_MACHINE_IP=${TARGET_MACHINE_IP}" >> ${GITHUB_ENV} | |
- name: Notify - Deployment Initialized | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'true' }} | |
run: | | |
WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
PAYLOAD=$(cat <<-EOH | |
{ | |
"text": "<@${{ github.actor }}> - deployment to <http://${TARGET_MACHINE_IP}:4646/ui|*${{ github.event.inputs.target_machine }}*> has been initialized", | |
"attachments": [ | |
{ | |
"color": "#0000FF", | |
"fields": [ | |
{ | |
"title": "Workflow", | |
"value": "<${WORKFLOW_URL}|View Workflow Run>", | |
"short": false | |
}, | |
] | |
} | |
] | |
} | |
EOH | |
) | |
curl -X POST -H 'Content-type: application/json' --data "${PAYLOAD}" "${{ secrets.SLACK_CI_CHANNEL_WEBHOOK_URL }}" | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
ref: ${{ github.event.workflow_run.head_branch || github.event.inputs.branch || github.ref }} | |
- name: Setup Cache | |
uses: actions/cache@v4 | |
with: | |
path: | | |
~/go/pkg/mod | |
~/.cache/go-build | |
~/.local/pipx | |
/usr/local/bin/goreleaser | |
key: ${{ runner.os }}-go-${{ hashFiles('**/go.work.sum') }} | |
restore-keys: ${{ runner.os }}-go- | |
- name: Setup Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: 1.22 | |
check-latest: true | |
cache-dependency-path: go.work.sum | |
- name: Install Required Dependencies | |
if: steps.cache.outputs.cache-hit != 'true' | |
run: | | |
echo 'deb [trusted=yes] https://repo.goreleaser.com/apt/ /' | sudo tee /etc/apt/sources.list.d/goreleaser.list | |
sudo add-apt-repository --yes ppa:ethereum/ethereum | |
sudo apt-get update | |
sudo apt-get install --yes goreleaser ethereum | |
pip install boto3 botocore | |
pipx inject ansible-core botocore boto3 | |
- name: Configure Control Machine | |
run: | | |
ANSIBLE_USER="${USER}" | |
ANSIBLE_CONNECTION="ansible_connection=local" | |
if [ "${IS_MANUAL_DEPLOYMENT}" == "true" ]; then | |
ANSIBLE_USER="ubuntu" | |
ANSIBLE_CONNECTION="" | |
export ANSIBLE_HOST_KEY_CHECKING=false | |
mkdir -p ~/.ssh && \ | |
chmod 700 ~/.ssh && \ | |
echo "${{ secrets.INFRASTRUCTURE_DEPLOYMENT_KEY }}" > ~/.ssh/id_ed25519 && \ | |
chmod 600 ~/.ssh/id_ed25519 | |
fi | |
aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws configure set default.region us-west-2 | |
cp ansible.cfg.example ansible.cfg | |
cat <<-EOH > hosts.ini | |
[nomad_servers] | |
${TARGET_MACHINE_IP} ${ANSIBLE_CONNECTION} ansible_user=${ANSIBLE_USER} | |
[nomad_clients] | |
${TARGET_MACHINE_IP} ${ANSIBLE_CONNECTION} ansible_user=${ANSIBLE_USER} | |
EOH | |
STDOUT="$(ansible all -o -m command -a 'uname -sm' 2>&1)" | |
if [ $? -ne 0 ]; then | |
echo "Unable to connect to target machine: ${STDOUT}" | |
exit 1 | |
fi | |
if [ "${IS_MANUAL_DEPLOYMENT}" == "false" ] || [ "${{ github.event.inputs.all_targets }}" == "false" ]; then | |
STDOUT="$(echo "${STDOUT}" | awk -F 'stdout\\) ' '{print $2}')" | |
OS="$(echo "${STDOUT}" | awk '{print $1}' | tr '[:upper:]' '[:lower:]')" | |
ARCH="$(echo "${STDOUT}" | awk '{print $2}' | tr '[:upper:]' '[:lower:]')" | |
[ "${ARCH}" == "x86_64" ] && ARCH="amd64" | |
echo "ARTIFACTS_GOOS=${OS}" >> ${GITHUB_ENV} | |
echo "ARTIFACTS_GOARCH=${ARCH}" >> ${GITHUB_ENV} | |
fi | |
working-directory: infrastructure/nomad | |
- name: Destroy Existing Cluster | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'true' }} | |
run: | | |
./cluster.sh destroy ${CLUSTER_DEBUG_FLAG} | |
working-directory: infrastructure/nomad | |
- name: Initialize Cluster | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'false' }} | |
run: | | |
START_TIME="$(date +%s)" | |
./cluster.sh init ${CLUSTER_ENVIRONMENT_FLAG} ${CLUSTER_PROFILE_FLAG} ${CLUSTER_DEBUG_FLAG} | |
END_TIME="$(date +%s)" | |
echo "INIT_DURATION=$(date -ud "@$((END_TIME - START_TIME))" +'%H:%M:%S')" >> ${GITHUB_ENV} | |
working-directory: infrastructure/nomad | |
- name: Deploy Cluster | |
run: | | |
START_TIME="$(date +%s)" | |
./cluster.sh deploy ${CLUSTER_ENVIRONMENT_FLAG} ${CLUSTER_PROFILE_FLAG} ${CLUSTER_LOGS_FLAG} ${CLUSTER_DATADOG_KEY_FLAG} ${CLUSTER_DEBUG_FLAG} | |
END_TIME="$(date +%s)" | |
echo "DEPLOY_DURATION=$(date -ud "@$((END_TIME - START_TIME))" +'%H:%M:%S')" >> ${GITHUB_ENV} | |
working-directory: infrastructure/nomad | |
- name: Notify - Deployment Successful | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'true' && success() }} | |
run: | | |
DEPLOY_DURATION=${DEPLOY_DURATION:-N/A} | |
RUNNER_DURATION=$(date -ud "@$(( $(date +%s) - ${RUNNER_START_TIME} ))" +'%H:%M:%S') | |
WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
PAYLOAD=$(cat <<-EOH | |
{ | |
"text": "<@${{ github.actor }}> - deployment to <http://${TARGET_MACHINE_IP}:4646/ui|*${{ github.event.inputs.target_machine }}*> was successful", | |
"attachments": [ | |
{ | |
"color": "#00FF00", | |
"fields": [ | |
{ | |
"title": "Workflow", | |
"value": "<${WORKFLOW_URL}|View Workflow Run>", | |
"short": false | |
}, | |
{ | |
"title": "Deployment Duration", | |
"value": "Deploy: ${DEPLOY_DURATION}\nRunner: ${RUNNER_DURATION}", | |
"short": false | |
} | |
] | |
} | |
] | |
} | |
EOH | |
) | |
curl -X POST -H 'Content-type: application/json' --data "${PAYLOAD}" "${{ secrets.SLACK_CI_CHANNEL_WEBHOOK_URL }}" | |
- name: Notify - Deployment Failed | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'true' && failure() }} | |
run: | | |
WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
PAYLOAD=$(cat <<-EOH | |
{ | |
"text": "<@${{ github.actor }}> - deployment to <http://${TARGET_MACHINE_IP}:4646/ui|*${{ github.event.inputs.target_machine }}*> has failed", | |
"attachments": [ | |
{ | |
"color": "#FF0000", | |
"fields": [ | |
{ | |
"title": "Workflow", | |
"value": "<${WORKFLOW_URL}|View Workflow Run>", | |
"short": false | |
}, | |
] | |
} | |
] | |
} | |
EOH | |
) | |
curl -X POST -H 'Content-type: application/json' --data "${PAYLOAD}" "${{ secrets.SLACK_CI_CHANNEL_WEBHOOK_URL }}" | |
- name: Collect Cluster Logs | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'false' && failure() }} | |
run: | | |
NOMAD_SERVER="http://${TARGET_MACHINE_IP}:4646" | |
journalctl -u nomad > nomad.log | |
curl -s ${NOMAD_SERVER}/v1/jobs > nomad_jobs.json | |
ALLOC_IDS=$(curl -s ${NOMAD_SERVER}/v1/allocations | jq -r '.[].ID') | |
for ALLOC_ID in ${ALLOC_IDS}; do | |
JOB=$(curl -s ${NOMAD_SERVER}/v1/allocation/${ALLOC_ID} | jq -r '.JobID') | |
TASKS=$(curl -s ${NOMAD_SERVER}/v1/allocation/${ALLOC_ID} | jq -r '.TaskStates | keys[]') | |
for TASK in ${TASKS}; do | |
STDOUT=$(curl -s "${NOMAD_SERVER}/v1/client/fs/logs/${ALLOC_ID}?task=${TASK}&type=stdout") | |
if [ "$(jq -e .Data <<< "${STDOUT}" 2> /dev/null)" != "null" ]; then | |
echo ${STDOUT} | jq -r '.Data' | base64 -d > "${ALLOC_ID}_${JOB}_${TASK}_stdout.log" | |
else | |
echo "Failed to fetch stdout log for ${ALLOC_ID}_${JOB}_${TASK}:" | |
echo ${STDOUT} | |
fi | |
STDERR=$(curl -s "${NOMAD_SERVER}/v1/client/fs/logs/${ALLOC_ID}?task=${TASK}&type=stderr") | |
if [ "$(jq -e .Data <<< "${STDERR}" 2> /dev/null)" != "null" ]; then | |
echo ${STDERR} | jq -r '.Data' | base64 -d > "${ALLOC_ID}_${JOB}_${TASK}_stderr.log" | |
else | |
echo "Failed to fetch stderr log for ${ALLOC_ID}_${JOB}_${TASK}:" | |
echo ${STDERR} | |
fi | |
done | |
done | |
- name: Upload Cluster Logs | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'false' && failure() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cluster-logs | |
path: | | |
nomad_jobs.json | |
nomad.log | |
*_stdout.log | |
*_stderr.log | |
- name: Initialize Debug Shell | |
if: ${{ env.IS_MANUAL_DEPLOYMENT == 'false' && failure() }} | |
run: | | |
TUNSHELL_KEYS=$(curl -sSf -X POST https://eu.relay.tunshell.com/api/sessions) | |
DEBUG_SHELL="sh <(curl -sSf https://lets.tunshell.com/init.sh) L $(echo ${TUNSHELL_KEYS} | jq -r .peer2_key) \${TUNSHELL_SECRET} eu.relay.tunshell.com" | |
WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
PR_TITLE="$(jq -r .head_commit.message <<< '${{ toJson(github.event.workflow_run) }}')" | |
PR_URL="$(jq -r '.head_commit.url // ""' <<< '${{ toJson(github.event.workflow_run) }}')" | |
PAYLOAD=$(cat <<-EOH | |
{ | |
"text": "<@${{ github.actor }}> infrastructure workflow has failed:", | |
"attachments": [ | |
{ | |
"color": "#FF0000", | |
"fields": [ | |
{ | |
"title": "Workflow", | |
"value": "<${WORKFLOW_URL}|View Workflow Run>", | |
"short": false | |
}, | |
$( [ -n "$PR_URL" ] && cat <<-PULL_REQUEST | |
{ | |
"title": "Pull Request", | |
"value": "<${PR_URL}|${PR_TITLE}>", | |
"short": false | |
}, | |
PULL_REQUEST | |
) | |
{ | |
"title": "Debug Shell", | |
"value": "\`\`\`${DEBUG_SHELL}\`\`\`", | |
"short": false | |
} | |
] | |
} | |
] | |
} | |
EOH | |
) | |
echo "Debug Shell: ${DEBUG_SHELL}" | |
curl -X POST -H 'Content-type: application/json' --data "${PAYLOAD}" "${{ secrets.SLACK_CI_CHANNEL_WEBHOOK_URL }}" | |
curl -sSf https://lets.tunshell.com/init.sh | sh -s -- T $(echo ${TUNSHELL_KEYS} | jq -r .peer1_key) ${{ secrets.TUNSHELL_SECRET }} eu.relay.tunshell.com |