diff --git a/.github/workflows/chatbot.yaml b/.github/workflows/chatbot.yaml index 4fec87e0..597b4360 100644 --- a/.github/workflows/chatbot.yaml +++ b/.github/workflows/chatbot.yaml @@ -46,7 +46,7 @@ jobs: - name: Install opentelemetry dependencies run: | - pip install --no-cache-dir opentelemetry-sdk opentelemetry-exporter-otlp opentelemetry-instrumentation + pip install --no-cache-dir opentelemetry-sdk opentelemetry-exporter-otlp - name: Download OpenTelemetry Collector Contrib run: | @@ -79,7 +79,7 @@ jobs: verbosity: detailed service: pipelines: - traces: + metrics: receivers: [otlp] exporters: [debug, otlphttp] ' > otel-collector-config.yaml @@ -93,12 +93,12 @@ jobs: sudo apt-get update sudo apt-get install -y qemu-user-static - - name: Start job trace + - name: Start Metric run: | export WORKFLOW_NAME="chatbot" export JOB_NAME="chatbot-build-and-push" - export TRACE_ACTION="start" - python ci/trace-steps.py + export METRIC_ACTION="start" + python ci/metrics-generate.py - name: Build Image id: build_image @@ -140,10 +140,10 @@ jobs: tags: ${{ steps.build_image.outputs.tags }} registry: ${{ env.REGISTRY }} - - name: End job trace + - name: End Metric run: | export WORKFLOW_NAME="chatbot" export JOB_NAME="chatbot-build-and-push" - export TRACE_ACTION="end" - python ci/trace-steps.py + export METRIC_ACTION="end" + python ci/metrics-generate.py diff --git a/.github/workflows/test-trace-steps.yaml b/.github/workflows/test-trace-steps.yaml deleted file mode 100644 index 673fe1d0..00000000 --- a/.github/workflows/test-trace-steps.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# To run locally -# act -W .github/workflows/test-trace-steps.yaml --container-architecture linux/amd64 -b ci/logs:/logs - -name: Test Workflow - -on: - pull_request: - branches: - - main - paths: - - .github/workflows/test-trace-steps.yaml - workflow_dispatch: - -jobs: - test-build: - if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')" - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4.1.7 - - name: Set up Python - uses: actions/setup-python@v5.1.0 - with: - python-version: '3.11' - - - name: Install Python dependencies - run: | - pip install --no-cache-dir opentelemetry-sdk opentelemetry-exporter-otlp opentelemetry-instrumentation - - - name: Download OpenTelemetry Collector Contrib - run: | - wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.103.0/otelcol-contrib_0.103.0_linux_amd64.tar.gz - tar -xvf otelcol-contrib_0.103.0_linux_amd64.tar.gz - - - name: Write secrets to files - run: | - echo "${{ secrets.ROSA_OTEL_CACERT }}" > /tmp/ca.crt - echo "${{ secrets.ROSA_OTEL_SERVER_CRT }}" > /tmp/server.crt - echo "${{ secrets.ROSA_OTEL_SERVER_KEY }}" > /tmp/server.key - - - name: Configure OpenTelemetry Collector - run: | - echo ' - receivers: - otlp: - protocols: - grpc: - http: - exporters: - otlphttp: - endpoint: "${{ secrets.ROSA_OTEL_ENDPOINT }}" - tls: - insecure: false - cert_file: /tmp/server.crt - key_file: /tmp/server.key - ca_file: /tmp/ca.crt - debug: - verbosity: detailed - service: - pipelines: - traces: - receivers: [otlp] - exporters: [debug, otlphttp] - ' > otel-collector-config.yaml - - - name: Run OpenTelemetry Collector - run: | - ./otelcol-contrib --config otel-collector-config.yaml > otel-collector.log 2>&1 & - - - name: Start job trace - run: | - export WORKFLOW_NAME="test-trace" - export JOB_NAME="test-build" - export TRACE_ACTION="start" - python ci/trace-steps.py - - - name: Build - run: | - echo "Simulating build step..." - sleep 2 - - - name: Test - run: | - echo "Simulating test step..." - sleep 2 - - - name: End job trace - run: | - export WORKFLOW_NAME="test-trace" - export JOB_NAME="test-build" - export TRACE_ACTION="end" - python ci/trace-steps.py - - - name: Display OpenTelemetry Collector Logs - run: cat otel-collector.log diff --git a/.github/workflows/testing_framework.yaml b/.github/workflows/testing_framework.yaml index efcb8a2e..6544ea21 100644 --- a/.github/workflows/testing_framework.yaml +++ b/.github/workflows/testing_framework.yaml @@ -53,10 +53,21 @@ jobs: with: python-version: '3.11' - - name: Install Python dependencies for otel trace generation + - name: Install opentelemetry dependencies run: | pip install --no-cache-dir opentelemetry-sdk opentelemetry-exporter-otlp opentelemetry-instrumentation + - name: Download OpenTelemetry Collector Contrib + run: | + wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.103.0/otelcol-contrib_0.103.0_linux_amd64.tar.gz + tar -xvf otelcol-contrib_0.103.0_linux_amd64.tar.gz + + - name: Write secrets to files + run: | + echo "${{ secrets.ROSA_OTEL_CACERT }}" > /tmp/ca.crt + echo "${{ secrets.ROSA_OTEL_SERVER_CRT }}" > /tmp/server.crt + echo "${{ secrets.ROSA_OTEL_SERVER_KEY }}" > /tmp/server.key + - name: Configure OpenTelemetry Collector run: | echo ' @@ -64,39 +75,34 @@ jobs: otlp: protocols: grpc: - endpoint: "0.0.0.0:4317" http: - endpoint: "0.0.0.0:4318" exporters: otlphttp: - endpoint: https://apps.platform-sts.pcbk.p1.openshiftapps.com + endpoint: "${{ secrets.ROSA_OTEL_ENDPOINT }}" tls: insecure: false - cert_pem: ${{ secrets.CLIENT_CERT_ROSA_OTEL }} - key_pem: ${{ secrets.CLIENT_KEY_ROSA_OTEL }} - ca_pem: ${{ secrets.SERVER_CERT_ROSA_OTEL }} - logging: + cert_file: /tmp/server.crt + key_file: /tmp/server.key + ca_file: /tmp/ca.crt + debug: verbosity: detailed service: pipelines: - traces: + metrics: receivers: [otlp] - exporters: [logging,otlphttp] + exporters: [debug, otlphttp] ' > otel-collector-config.yaml - name: Run OpenTelemetry Collector run: | - curl --proto '=https' --tlsv1.2 -fOL https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.102.1/otelcol_0.102.1_linux_amd64.tar.gz - tar -xvf otelcol_0.102.1_linux_amd64.tar.gz - chmod +x otelcol - ./otelcol --config otel-collector-config.yaml & + ./otelcol-contrib --config otel-collector-config.yaml > otel-collector.log 2>&1 & - - name: Start integration-tests trace + - name: Start integration-tests job metric run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="integration-tests" + export WORKFLOW_NAME="testing-framework" + export JOB_NAME="integration-tests" export TRACE_ACTION="start" - python ci/trace-steps.py + python ci/metrics-generate.py - name: Checkout terraform module id: checkout-module @@ -116,13 +122,6 @@ jobs: run: terraform init working-directory: terraform-test-environment-module - - name: Start bootstrap trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="bootstrap" - export TRACE_ACTION="start" - python ci/trace-steps.py - - name: Bootstrap id: up run: terraform apply -auto-approve -lock=false @@ -131,13 +130,6 @@ jobs: TF_VAR_aws_instance_type: ${{ matrix.aws_image_type }} TF_VAR_aws_ami_architecture: ${{ matrix.aws_ami_architecture }} - - name: End bootstrap trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="bootstrap" - export TRACE_ACTION="end" - python ci/trace-steps.py - - name: Terraform Output id: terraform-output run: | @@ -150,13 +142,6 @@ jobs: run: ansible-galaxy install -r ./provision/requirements.yml working-directory: ./main/recipes/natural_language_processing/chatbot - - name: Start ansible provision trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="ansible-provision" - export TRACE_ACTION="start" - python ci/trace-steps.py - - name: Provision run: | ansible-playbook ./main/recipes/natural_language_processing/chatbot/provision/playbook.yml \ @@ -165,37 +150,16 @@ jobs: env: ANSIBLE_HOST_KEY_CHECKING: false - - name: End ansible provision trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="ansible-provision" - export TRACE_ACTION="end" - python ci/trace-steps.py - - name: Install Dependencies working-directory: ./main/recipes/natural_language_processing/chatbot run: make install - - name: Start integration test trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="run-integration-tests" - export TRACE_ACTION="start" - python ci/trace-steps.py - - name: Run Integration Tests working-directory: ./main/recipes/natural_language_processing/chatbot run: make integration-tests env: URL: ${{ steps.terraform-output.outputs.url }} - - name: End integration test trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="run-integration-tests" - export TRACE_ACTION="end" - python ci/trace-steps.py - - name: Destroy Test Environment id: down if: always() @@ -217,12 +181,12 @@ jobs: env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - - name: Stop testing_framework trace + - name: End integration-tests job metric run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="integration-tests" + export WORKFLOW_NAME="testing-framework" + export JOB_NAME="integration-tests" export TRACE_ACTION="end" - python ci/trace-steps.py + python ci/metrics-generate.py release-images: runs-on: ubuntu-24.04 @@ -272,53 +236,10 @@ jobs: with: python-version: '3.11' - - name: Install Python dependencies for otel trace generation - run: | - pip install --no-cache-dir opentelemetry-sdk opentelemetry-exporter-otlp opentelemetry-instrumentation - - - name: Configure OpenTelemetry Collector - run: | - echo ' - receivers: - otlp: - protocols: - grpc: - endpoint: "0.0.0.0:4317" - exporters: - logging: - loglevel: debug - service: - pipelines: - traces: - receivers: [otlp] - exporters: [logging] - ' > otel-collector-config.yaml - - - name: Run OpenTelemetry Collector - run: | - curl --proto '=https' --tlsv1.2 -fOL https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.102.1/otelcol_0.102.1_linux_amd64.tar.gz - tar -xvf otelcol_0.102.1_linux_amd64.tar.gz - chmod +x otelcol - ./otelcol --config otel-collector-config.yaml & - - - name: Start chatbot make bootc trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="test-make-targets" - export TRACE_ACTION="start" - python ci/trace-steps.py - - name: chatbot working-directory: ./recipes/natural_language_processing/chatbot run: make bootc - - name: End chatbot make bootc trace - run: | - export WORKFLOW_NAME="testing_framework" - export STEP_NAME="test-make-targets" - export TRACE_ACTION="end" - python ci/trace-steps.py - - name: Publish Job Results to Slack id: slack if: always() diff --git a/ci/metrics-generate.py b/ci/metrics-generate.py new file mode 100644 index 00000000..973f790e --- /dev/null +++ b/ci/metrics-generate.py @@ -0,0 +1,47 @@ +import os +import time +from datetime import datetime +from opentelemetry import metrics +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader + +# Initialize MeterProvider and Exporter +service_name = os.getenv("WORKFLOW_NAME", "default_service") +exporter = OTLPMetricExporter(endpoint="localhost:4317", insecure=True) +metric_reader = PeriodicExportingMetricReader(exporter) +provider = MeterProvider(metric_readers=[metric_reader]) +metrics.set_meter_provider(provider) +meter = metrics.get_meter(__name__) + +# Create a counter for job duration +job_duration_histogram = meter.create_histogram( + name="job_duration_seconds", + description="Duration of the job in seconds", + unit="s" +) + +def set_start_time(): + start_time = datetime.now().timestamp() + with open("/tmp/start_time.txt", "w") as file: + file.write(str(start_time)) + print("Start time recorded") + +def calculate_duration(): + with open("/tmp/start_time.txt", "r") as file: + start_time = float(file.read()) + end_time = datetime.now().timestamp() + duration = end_time - start_time + print(f"Total Duration: {duration}s") + + # Record the duration in the histogram + job_duration_histogram.record(duration, {"job_name": os.getenv("JOB_NAME", "default_job")}) + +if __name__ == "__main__": + action = os.getenv("METRIC_ACTION", "start") + + if action == "start": + set_start_time() + elif action == "end": + calculate_duration() + diff --git a/ci/trace-steps.py b/ci/trace-steps.py deleted file mode 100644 index 07db0606..00000000 --- a/ci/trace-steps.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import time -from datetime import datetime -from opentelemetry import trace -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter - -service_name = os.getenv("WORKFLOW_NAME", "default_service") -job_name = os.getenv("JOB_NAME", "default_job") - -resource = Resource.create({"service.name": service_name}) -trace.set_tracer_provider(TracerProvider(resource=resource)) -tracer = trace.get_tracer(__name__) -console_span_processor = BatchSpanProcessor(ConsoleSpanExporter()) -trace.get_tracer_provider().add_span_processor(console_span_processor) - -# Adding OTLP Span Exporter for actual data export -otlp_exporter = OTLPSpanExporter(endpoint="localhost:4317", insecure=True) -otlp_span_processor = BatchSpanProcessor(otlp_exporter) -trace.get_tracer_provider().add_span_processor(otlp_span_processor) - -print("Tracer initialized with service name:", service_name) - -def set_start_time(): - start_time = datetime.now().timestamp() - with open("/tmp/start_time.txt", "w") as file: - file.write(str(start_time)) - print("Start time recorded") - -def calculate_duration(): - with open("/tmp/start_time.txt", "r") as file: - start_time = float(file.read()) - end_time = datetime.now().timestamp() - duration = end_time - start_time - print(f"Total Duration: {duration}s") - with tracer.start_as_current_span(job_name) as span: - span.set_attribute("total_duration_s", duration) - -if __name__ == "__main__": - action = os.getenv("TRACE_ACTION", "start") - - if action == "start": - set_start_time() - elif action == "end": - calculate_duration()