From 4fd8b39aa251dbe0dca6cd07eaab976df0761ecd Mon Sep 17 00:00:00 2001 From: mrekucci Date: Mon, 19 Aug 2024 08:25:53 +0200 Subject: [PATCH] feat: add option to enable tracing to GH infrastructure action --- .github/workflows/infrastructure.yml | 19 +++++-- infrastructure/nomad/cluster.sh | 52 ++++++++++++------- infrastructure/nomad/playbooks/deploy.yml | 22 ++++++-- .../jobs/mev-commit-emulator.nomad.j2 | 2 +- .../templates/jobs/mev-commit.nomad.j2 | 2 +- .../templates/jobs/otel-collector.nomad.j2 | 2 +- .../nomad/playbooks/variables/profiles.yml | 11 ++-- 7 files changed, 74 insertions(+), 36 deletions(-) diff --git a/.github/workflows/infrastructure.yml b/.github/workflows/infrastructure.yml index 9d4aa479f..f6d1bcc10 100644 --- a/.github/workflows/infrastructure.yml +++ b/.github/workflows/infrastructure.yml @@ -21,6 +21,10 @@ on: description: 'Debug Deployment' type: boolean default: false + tracing: + description: 'Enable Tracing' + type: boolean + default: false logs: description: 'Collect Logs' type: boolean @@ -64,23 +68,25 @@ jobs: sudo systemctl restart systemd-resolved IS_MANUAL_DEPLOYMENT=$([ "${{ github.event_name }}" == "workflow_dispatch" ] && echo true || echo false) + TARGET_MACHINE_IP=$([ "${IS_MANUAL_DEPLOYMENT}" == "true" ] && echo "$(dig +short ${{ github.event.inputs.target_machine }})" || echo "127.0.0.1") CLUSTER_ENVIRONMENT_FLAG="--environment devenv" CLUSTER_PROFILE_FLAG=$([ "${IS_MANUAL_DEPLOYMENT}" == "true" ] && echo "--profile ${{ github.event.inputs.profile }}" || echo "--profile ci") CLUSTER_LOGS_FLAG=$([ "${{ github.event.inputs.logs }}" == "false" ] && echo "--no-logs-collection" || echo "") CLUSTER_DATADOG_KEY_FLAG=$([ "${IS_MANUAL_DEPLOYMENT}" == "true" ] && echo "--datadog-key ${{ secrets.DATADOG_API_KEY }}" || echo "") CLUSTER_L1_RPC_URL_FLAG="--l1-rpc-url ${{ secrets.L1_RPC_URL }}" + CLUSTER_OTEL_COLLECTOR_ENDPOINT_URL_FLAG=$([ "${{ github.event.inputs.tracing }}" == "true" ] && echo "--otel-collector-endpoint-url grpc://${TARGET_MACHINE_IP}:4317" || echo "") CLUSTER_DEBUG_FLAG=$([ "${{ github.event.inputs.debug }}" == "true" ] && echo "--debug" || echo "") - TARGET_MACHINE_IP=$([ "${IS_MANUAL_DEPLOYMENT}" == "true" ] && echo "$(dig +short ${{ github.event.inputs.target_machine }})" || echo "127.0.0.1") echo "RUNNER_START_TIME=${RUNNER_START_TIME}" >> ${GITHUB_ENV} echo "IS_MANUAL_DEPLOYMENT=${IS_MANUAL_DEPLOYMENT}" >> ${GITHUB_ENV} + echo "TARGET_MACHINE_IP=${TARGET_MACHINE_IP}" >> ${GITHUB_ENV} echo "CLUSTER_ENVIRONMENT_FLAG=${CLUSTER_ENVIRONMENT_FLAG}" >> ${GITHUB_ENV} echo "CLUSTER_PROFILE_FLAG=${CLUSTER_PROFILE_FLAG}" >> ${GITHUB_ENV} echo "CLUSTER_LOGS_FLAG=${CLUSTER_LOGS_FLAG}" >> ${GITHUB_ENV} echo "CLUSTER_DATADOG_KEY_FLAG=${CLUSTER_DATADOG_KEY_FLAG}" >> ${GITHUB_ENV} echo "CLUSTER_L1_RPC_URL_FLAG=${CLUSTER_L1_RPC_URL_FLAG}" >> ${GITHUB_ENV} + echo "CLUSTER_OTEL_COLLECTOR_ENDPOINT_URL_FLAG=${CLUSTER_OTEL_COLLECTOR_ENDPOINT_URL_FLAG}" >> ${GITHUB_ENV} echo "CLUSTER_DEBUG_FLAG=${CLUSTER_DEBUG_FLAG}" >> ${GITHUB_ENV} - echo "TARGET_MACHINE_IP=${TARGET_MACHINE_IP}" >> ${GITHUB_ENV} - name: Notify - Deployment Initialized if: ${{ env.IS_MANUAL_DEPLOYMENT == 'true' }} @@ -203,7 +209,14 @@ jobs: - name: Deploy Cluster run: | START_TIME="$(date +%s)" - ./cluster.sh deploy ${CLUSTER_ENVIRONMENT_FLAG} ${CLUSTER_PROFILE_FLAG} ${CLUSTER_LOGS_FLAG} ${CLUSTER_DATADOG_KEY_FLAG} ${CLUSTER_L1_RPC_URL_FLAG} ${CLUSTER_DEBUG_FLAG} + ./cluster.sh deploy \ + ${CLUSTER_ENVIRONMENT_FLAG} \ + ${CLUSTER_PROFILE_FLAG} \ + ${CLUSTER_LOGS_FLAG} \ + ${CLUSTER_DATADOG_KEY_FLAG} \ + ${CLUSTER_L1_RPC_URL_FLAG} \ + ${CLUSTER_OTEL_COLLECTOR_ENDPOINT_URL_FLAG} \ + ${CLUSTER_DEBUG_FLAG} END_TIME="$(date +%s)" echo "DEPLOY_DURATION=$(date -ud "@$((END_TIME - START_TIME))" +'%H:%M:%S')" >> ${GITHUB_ENV} working-directory: infrastructure/nomad diff --git a/infrastructure/nomad/cluster.sh b/infrastructure/nomad/cluster.sh index b7874d667..605520fc7 100755 --- a/infrastructure/nomad/cluster.sh +++ b/infrastructure/nomad/cluster.sh @@ -15,34 +15,36 @@ environment_name="devenv" profile_name="devnet" datadog_key="" l1_rpc_url="" +otel_collector_endpoint_url="" help() { echo "Usage:" echo "$0 [init [--environment ] [--skip-certificates-setup] [--debug]]" - echo "$0 [deploy [version=HEAD] [--environment ] [--profile ] [--force-build-templates] [--no-logs-collection] [--datadog-key ] [--l1-rpc-url ] [--release] [--debug]]" + echo "$0 [deploy [version=HEAD] [--environment ] [--profile ] [--force-build-templates] [--no-logs-collection] [--datadog-key ] [--l1-rpc-url ] [--otel-collector-endpoint-url ] [--release] [--debug]]" echo "$0 [destroy [--debug]] [--help]" echo "$0 --help" echo echo "Parameters:" - echo " init Initialize the environment." - echo " --environment Specify the environment to use (default is devenv)." - echo " --skip-certificates-setup Skip the certificates installation and setup." - echo " --debug Enable debug mode for detailed output." + echo " init Initialize the environment." + echo " --environment Specify the environment to use (default is devenv)." + echo " --skip-certificates-setup Skip the certificates installation and setup." + echo " --debug Enable debug mode for detailed output." echo - echo " deploy [version=HEAD] Deploy the specified artifact version (a git commit hash or an existing AWS S3 tag). If not specified or set to HEAD, a local build is triggered." - echo " --environment Specify the environment to use (default is devenv)." - echo " --profile Specify the profile to use (default is devnet)." - echo " --force-build-templates Force the build of all job templates before deployment." - echo " --no-logs-collection Disable the collection of logs from deployed jobs." - echo " --datadog-key Datadog API key, cannot be empty." - echo " --l1-rpc-url L1 RPC URL, cannot be empty." - echo " --release It will ignore the specified deployment version and use the current HEAD tag as the build version." - echo " --debug Enable debug mode for detailed output." + echo " deploy [version=HEAD] Deploy the specified artifact version (a git commit hash or an existing AWS S3 tag). If not specified or set to HEAD, a local build is triggered." + echo " --environment Specify the environment to use (default is devenv)." + echo " --profile Specify the profile to use (default is devnet)." + echo " --force-build-templates Force the build of all job templates before deployment." + echo " --no-logs-collection Disable the collection of logs from deployed jobs." + echo " --datadog-key Datadog API key, cannot be empty." + echo " --l1-rpc-url L1 RPC URL, cannot be empty." + echo " --otel-collector-endpoint-url OpenTelemetry Collector Endpoint URL, cannot be empty." + echo " --release It will ignore the specified deployment version and use the current HEAD tag as the build version." + echo " --debug Enable debug mode for detailed output." echo - echo " destroy Destroy the whole cluster." - echo " --debug Enable debug mode for detailed output." + echo " destroy Destroy the whole cluster." + echo " --debug Enable debug mode for detailed output." echo - echo " --help Display this help message." + echo " --help Display this help message." echo echo "Examples:" echo " Initialize with default environment and profile:" @@ -66,8 +68,8 @@ help() { echo " Deploy with a specific version, environment, profile and force to build all job templates:" echo " $0 deploy v0.1.0 --environment devenv --profile testnet --force-build-templates" echo - echo " Deploy with a specific version, environment, profile in debug mode with disabled logs collection, Datadog API key and L1 RPC URL:" - echo " $0 deploy v0.1.0 --environment devenv --profile testnet --no-logs-collection --datadog-key your_datadog_key --l1-rpc-url your_rpc_url --debug" + echo " Deploy with a specific version, environment, profile in debug mode with disabled logs collection, Datadog API key, L1 RPC URL, and OpenTememetry Collector Endpoint URL:" + echo " $0 deploy v0.1.0 --environment devenv --profile testnet --no-logs-collection --datadog-key your_datadog_key --l1-rpc-url your_rpc_url --otel-collector-endpoint-url your_otel_url --debug" echo echo " Destroy with specific environment and debug mode:" echo " $0 destroy --environment devenv --debug" @@ -77,7 +79,7 @@ help() { usage() { echo "Usage:" echo "$0 [init [--environment ] [--skip-certificates-setup] [--debug]]" - echo "$0 [deploy [version=HEAD] [--environment ] [--profile ] [--force-build-templates] [--no-logs-collection] [--datadog-key ] [--l1-rpc-url ] [--release] [--debug]]" + echo "$0 [deploy [version=HEAD] [--environment ] [--profile ] [--force-build-templates] [--no-logs-collection] [--datadog-key ] [--l1-rpc-url ] [--otel-collector-endpoint-url ] [--release] [--debug]]" echo "$0 [destroy [--debug]] [--help]" echo "$0 --help" exit 1 @@ -216,6 +218,15 @@ parse_args() { usage fi fi + if [[ $# -gt 0 && $1 == "--otel-collector-endpoint-url" ]]; then + if [[ $# -gt 1 && ! $2 =~ ^-- ]]; then + otel_collector_endpoint_url="$2" + shift 2 + else + echo "Error: --otel-collector-endpoint-url requires a value." + usage + fi + fi if [[ $# -gt 0 && $1 == "--release" ]]; then release_flag=true shift @@ -272,6 +283,7 @@ main() { [[ "${force_build_templates_flag}" == true ]] && flags+=("--extra-vars" "build_templates=true") [[ -n "${datadog_key}" ]] && flags+=("--extra-vars" "datadog_key=${datadog_key}") [[ -n "${l1_rpc_url}" ]] && flags+=("--extra-vars" "l1_rpc_url=${l1_rpc_url}") + [[ -n "${otel_collector_endpoint_url}" ]] && flags+=("--extra-vars" "otel_collector_endpoint_url=${otel_collector_endpoint_url}") [[ "${release_flag}" == true ]] && flags+=("--extra-vars" "release=true") ;; "${destroy_flag}") diff --git a/infrastructure/nomad/playbooks/deploy.yml b/infrastructure/nomad/playbooks/deploy.yml index e7c50da30..9c4f1531a 100644 --- a/infrastructure/nomad/playbooks/deploy.yml +++ b/infrastructure/nomad/playbooks/deploy.yml @@ -8,6 +8,7 @@ release: false build_artifacts: false build_templates: false + otel_collector_endpoint_url: "" aws_s3_bucket: "primev-infrastructure-artifacts" pre_tasks: @@ -122,16 +123,29 @@ fail_msg: "The profile name is not set correctly." success_msg: "The profile name is set to: {{ profile }}." - - name: Determine Logs Collection + - name: Set Jobs Definition + set_fact: + jobs: "{{ profiles[profile].jobs }}" + + - name: Disable Logs Collection set_fact: jobs: >- {{ - profiles[profile].jobs + jobs | selectattr('name', 'ne', 'datadog-agent-logs-collector') | list - if no_logs_collection | default(false) | bool - else profiles[profile].jobs }} + when: no_logs_collection | default(false) | bool + + - name: Disable OpenTelemetry Trace Collection + set_fact: + jobs: >- + {{ + jobs + | selectattr('name', 'ne', 'otel-collector') + | list + }} + when: otel_collector_endpoint_url | trim | length == 0 - name: Determine "{{ ansible_env.HOME }}/{{ env }}" Status ansible.builtin.stat: diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 index 0ad1f422e..7c48d11a2 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit-emulator.nomad.j2 @@ -67,7 +67,7 @@ job "{{ job.name }}" { if job.get('env') and job.env.get('log-tags') else 'service:' + job.name + '-{{ env "NOMAD_ALLOC_INDEX" }}' }}" - EMULATOR_OTEL_COLLECTOR_ENDPOINT_URL="{{ job.env.get('otel-collector-endpoint-url', '') }}" + EMULATOR_OTEL_COLLECTOR_ENDPOINT_URL="{{ job.env.get('otel_collector_endpoint_url', '') }}" {%- raw %} {{- $idx := add (env "NOMAD_ALLOC_INDEX" | parseInt) 1 }} {{- range nomadService (printf "%s%d" "{% endraw %}{{ job.target_name }}{% raw %}" $idx) }} diff --git a/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 index 33918f059..2af5a2d3a 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/mev-commit.nomad.j2 @@ -93,7 +93,7 @@ job "{{ job.name }}" { if job.env['log-tags'] is defined and job.env['log-tags'] else 'service:' + job.name + '-{{ env "NOMAD_ALLOC_INDEX" }}' }}" - MEV_COMMIT_OTEL_COLLECTOR_ENDPOINT_URL="{{ job.env['otel-collector-endpoint-url'] }}" + MEV_COMMIT_OTEL_COLLECTOR_ENDPOINT_URL="{{ job.env.get('otel_collector_endpoint_url', '') }}" {%- raw %} MEV_COMMIT_KEYSTORE_PATH="/local/data-{{ env "NOMAD_ALLOC_INDEX" }}/keystore" MEV_COMMIT_KEYSTORE_FILENAME="{{ with secret "secret/data/mev-commit" }}{{ .Data.data.{% endraw %}{{ job.artifacts | selectattr('keystore', 'defined') | map(attribute='keystore.name') | first }}{% raw %}_filename }}{{ end }}" diff --git a/infrastructure/nomad/playbooks/templates/jobs/otel-collector.nomad.j2 b/infrastructure/nomad/playbooks/templates/jobs/otel-collector.nomad.j2 index 3deca6623..c990bed4b 100644 --- a/infrastructure/nomad/playbooks/templates/jobs/otel-collector.nomad.j2 +++ b/infrastructure/nomad/playbooks/templates/jobs/otel-collector.nomad.j2 @@ -61,7 +61,7 @@ job "{{ job.name }}" { {% endif %} artifact { - source = "https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.107.0/otelcol-contrib_0.107.0_linux_{{ 'x64' if target_system_architecture == 'x86_64' else 'arm64' }}.tar.gz" + source = "https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.107.0/otelcol-contrib_0.107.0_linux_{{ 'amd64' if target_system_architecture == 'x86_64' else 'arm64' }}.tar.gz" } template { diff --git a/infrastructure/nomad/playbooks/variables/profiles.yml b/infrastructure/nomad/playbooks/variables/profiles.yml index bdb2d1530..daea091cb 100644 --- a/infrastructure/nomad/playbooks/variables/profiles.yml +++ b/infrastructure/nomad/playbooks/variables/profiles.yml @@ -1,5 +1,4 @@ datacenter: "dc1" -otel_collector_endpoint_url: "grpc://{{ ansible_facts['default_ipv4']['address'] }}:4317" artifacts: bidder_emulator: &bidder_emulator_artifact @@ -146,7 +145,7 @@ jobs: type: bootnode tls_crt_file: "{{ tls_crt_file }}" tls_key_file: "{{ tls_key_file }}" - otel-collector-endpoint-url: "{{ otel_collector_endpoint_url }}" + otel_collector_endpoint_url: "{{ otel_collector_endpoint_url }}" mev_commit_provider_node1: &mev_commit_provider_node1_job name: mev-commit-provider-node1 @@ -173,7 +172,7 @@ jobs: nat_address: "{{ ansible_facts['default_ipv4']['address'] }}" tls_crt_file: "{{ tls_crt_file }}" tls_key_file: "{{ tls_key_file }}" - otel-collector-endpoint-url: "{{ otel_collector_endpoint_url }}" + otel_collector_endpoint_url: "{{ otel_collector_endpoint_url }}" mev_commit_provider_node2: &mev_commit_provider_node2_job name: mev-commit-provider-node2 @@ -257,7 +256,7 @@ jobs: - metrics: to: 8080 env: - otel-collector-endpoint-url: "{{ otel_collector_endpoint_url }}" + otel_collector_endpoint_url: "{{ otel_collector_endpoint_url }}" mev-commit-provider-emulator-nodes: &mev_commit_provider_emulator_nodes_job name: mev-commit-provider-emulator-nodes @@ -295,7 +294,7 @@ jobs: type: bidder tls_crt_file: "{{ tls_crt_file }}" tls_key_file: "{{ tls_key_file }}" - otel-collector-endpoint-url: "{{ otel_collector_endpoint_url }}" + otel_collector_endpoint_url: "{{ otel_collector_endpoint_url }}" mev_commit_bidder_node2: &mev_commit_bidder_node2_job name: mev-commit-bidder-node2 @@ -440,7 +439,7 @@ jobs: to: 8080 env: l1_rpc_url: "{{ l1_rpc_url }}" - otel-collector-endpoint-url: "{{ otel_collector_endpoint_url }}" + otel_collector_endpoint_url: "{{ otel_collector_endpoint_url }}" mev_commit_bidder_emulator_nodes: &mev_commit_bidder_emulator_nodes_job name: mev-commit-bidder-emulator-nodes