Skip to content

Commit

Permalink
Add tracing using OpenTelemetry (#34)
Browse files Browse the repository at this point in the history
* Add tracing using OpenTelemetry
  • Loading branch information
andmat900 authored May 7, 2024
1 parent 683e655 commit 8a63838
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 29 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM python:3.9.0-buster AS build
FROM python:3.9-buster AS build

COPY . /src
WORKDIR /src
RUN python3 setup.py bdist_wheel

FROM python:3.9.0-slim-buster
FROM python:3.9-slim-buster

COPY --from=build /src/dist/*.whl /tmp
# hadolint ignore=DL3013
Expand Down
6 changes: 6 additions & 0 deletions manifests/base/suite-runner-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,19 @@ data:
name: {etos_configmap}
- secretRef:
name: {etos_rabbitmq_secret}
- configMapRef:
name: {etos_observability_configmap}
env:
- name: TERCC
value: '{EiffelTestExecutionRecipeCollectionCreatedEvent}'
- name: KUBEXIT_NAME
value: esr
- name: KUBEXIT_GRAVEYARD
value: /graveyard
- name: OTEL_CONTEXT
value: {otel_context}
- name: OTEL_COLLECTOR_HOST
value: {otel_collector_host}
volumeMounts:
- name: graveyard
mountPath: /graveyard
Expand Down
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@
# scipy==1.0
#
pyscaffold==3.2.3
etos_lib==3.2.1
etos_lib==4.2.0
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ setup_requires = pyscaffold>=3.2a0,<3.3a0
# Add here dependencies of your project (semicolon/line-separated), e.g.
install_requires =
pyscaffold==3.2.3
etos_lib==3.2.1
etos_lib==4.2.0
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21

# Require a specific Python version, e.g. Python 2.7 or >= 3.4
python_requires = >=3.4
Expand Down
22 changes: 22 additions & 0 deletions src/suite_starter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
"""ETOS suite starter module."""
import os
from importlib.metadata import version, PackageNotFoundError

from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import SERVICE_NAME, SERVICE_NAMESPACE, SERVICE_VERSION, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from etos_lib.logging.logger import setup_logging

# The suite starter shall not send logs to RabbitMQ as it
Expand All @@ -31,3 +38,18 @@
DEV = os.getenv("DEV", "false").lower() == "true"
ENVIRONMENT = "development" if DEV else "production"
setup_logging("ETOS Suite Starter", VERSION, ENVIRONMENT)

if os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
PROVIDER = TracerProvider(
resource=Resource.create(
{
SERVICE_NAME: "etos-suite-starter",
SERVICE_VERSION: VERSION,
SERVICE_NAMESPACE: ENVIRONMENT,
}
)
)
EXPORTER = OTLPSpanExporter()
PROCESSOR = BatchSpanProcessor(EXPORTER)
PROVIDER.add_span_processor(PROCESSOR)
trace.set_tracer_provider(PROVIDER)
76 changes: 51 additions & 25 deletions src/suite_starter/suite_starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,14 @@
import os
from pathlib import Path

from opentelemetry import trace, context
from opentelemetry.propagate import inject

from etos_lib import ETOS
from etos_lib.kubernetes.jobs import Job
from etos_lib.logging.logger import FORMAT_CONFIG
from etos_lib.opentelemetry.semconv import Attributes as SemConvAttributes


LOGGER = logging.getLogger(__name__)
# Remove spam from pika.
Expand Down Expand Up @@ -57,6 +62,7 @@ def __init__(self, suite_runner_template_path: str = "/app/suite_runner_template
self.suite_runner_callback,
can_nack=True,
)
self.tracer = trace.get_tracer(__name__)

def _load_template(self, suite_runner_template_path: str) -> str:
"""Load the suite runner template file."""
Expand All @@ -71,6 +77,7 @@ def _validate_template(self, suite_runner_template: str):
"EiffelTestExecutionRecipeCollectionCreatedEvent": "FakeEvent",
"suite_id": "FakeID",
"job_name": "FakeName",
"otel_context": "",
}
formatted = suite_runner_template.format(**data, **self.etos.config.get("configuration"))
job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT")))
Expand All @@ -82,13 +89,26 @@ def _configure(self):
"docker_image": os.getenv("SUITE_RUNNER"),
"log_listener": os.getenv("LOG_LISTENER"),
"etos_configmap": os.getenv("ETOS_CONFIGMAP"),
"etos_observability_configmap": os.getenv("ETOS_OBSERVABILITY_CONFIGMAP"),
"etos_rabbitmq_secret": os.getenv("ETOS_RABBITMQ_SECRET"),
"ttl": os.getenv("ETOS_ESR_TTL", "3600"),
"termination_grace_period": os.getenv("ETOS_TERMINATION_GRACE_PERIOD", "300"),
"sidecar_image": os.getenv("ETOS_SIDECAR_IMAGE"),
"otel_collector_host": os.getenv("OTEL_COLLECTOR_HOST") or "null",
}
self.etos.config.set("configuration", configuration)

def _get_current_context(self):
"""Get current OpenTelemetry context."""
ctx = context.get_current()
LOGGER.info("Current OpenTelemetry context: %s", ctx)
carrier = {}
# inject() creates a dict with context reference,
# e. g. {'traceparent': '00-0be6c260d9cbe9772298eaf19cb90a5b-371353ee8fbd3ced-01'}
inject(carrier)
env = ",".join(f"{k}={v}" for k, v in carrier.items())
return env

def suite_runner_callback(self, event, _):
"""Start a suite runner on a TERCC event.
Expand All @@ -97,31 +117,37 @@ def suite_runner_callback(self, event, _):
:return: Whether event was ACK:ed or not.
:rtype: bool
"""
suite_id = event.meta.event_id
FORMAT_CONFIG.identifier = suite_id
LOGGER.info("Received a TERCC event. Build data for ESR.")
data = {"EiffelTestExecutionRecipeCollectionCreatedEvent": json.dumps(event.json)}
data["suite_id"] = suite_id

job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT")))
job_name = job.uniqueify(f"suite-runner-{suite_id}").lower()
data["job_name"] = job_name

LOGGER.info("Dynamic data: %r", data)
LOGGER.info("Static data: %r", self.etos.config.get("configuration"))
try:
assert data["EiffelTestExecutionRecipeCollectionCreatedEvent"]
except AssertionError as exception:
LOGGER.critical("Incomplete data for ESR. %r", exception)
raise

body = job.load_yaml(
self.suite_runner_template.format(**data, **self.etos.config.get("configuration"))
)
LOGGER.info("Starting new executor: %r", job_name)
job.create_job(body)
LOGGER.info("ESR successfully launched.")
return True
with self.tracer.start_as_current_span("suite", context=context.get_current()) as span:
suite_id = event.meta.event_id
FORMAT_CONFIG.identifier = suite_id
LOGGER.info("Received a TERCC event. Build data for ESR.")
data = {"EiffelTestExecutionRecipeCollectionCreatedEvent": json.dumps(event.json)}
data["suite_id"] = suite_id
data["otel_context"] = self._get_current_context()
span.set_attribute(SemConvAttributes.SUITE_ID, suite_id)

job = Job(in_cluster=bool(os.getenv("DOCKER_CONTEXT")))
job_name = job.uniqueify(f"suite-runner-{suite_id}").lower()
span.set_attribute(SemConvAttributes.SUITE_RUNNER_JOB_ID, job_name)
data["job_name"] = job_name

LOGGER.info("Dynamic data: %r", data)
LOGGER.info("Static data: %r", self.etos.config.get("configuration"))
try:
assert data["EiffelTestExecutionRecipeCollectionCreatedEvent"]
except AssertionError as exception:
LOGGER.critical("Incomplete data for ESR. %r", exception)
span.record_exception(exception)
span.set_status(trace.Status(trace.StatusCode.ERROR))
raise

body = job.load_yaml(
self.suite_runner_template.format(**data, **self.etos.config.get("configuration"))
)
LOGGER.info("Starting new executor: %r", job_name)
job.create_job(body)
LOGGER.info("ESR successfully launched.")
return True

def run(self):
"""Run the SuiteStarter main loop.
Expand Down

0 comments on commit 8a63838

Please sign in to comment.