Skip to content

Commit

Permalink
Workload tracing (#645)
Browse files Browse the repository at this point in the history
* [WIP] Workload tracing for prometheus

* fmt

* more fmt

* Add juju topology to workload's traces context

* Add charm name to topology

* pin websockets to <14.0

* Add OTEL_RESOURCE_ATTRIBUTES conditionally if workload tracing is enabled

* Naming in metadata.yaml

* Use self.workload_tracing_endpoint
  • Loading branch information
mmkay authored Nov 13, 2024
1 parent a832528 commit 3d0ac1a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 9 deletions.
9 changes: 8 additions & 1 deletion metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,16 @@ requires:
- (client) scraping targets for self-monitoring
- (client) posting alerts to alertmanager server
- (server) serving data to grafana
tracing:
charm-tracing:
interface: tracing
limit: 1
description: |
Enables sending charm traces to a distributed tracing backend such as Tempo.
workload-tracing:
interface: tracing
limit: 1
description: |
Enables sending workload traces (internal Prometheus traces) to a distributed tracing backend such as Tempo.
peers:
prometheus-peers:
Expand Down
51 changes: 43 additions & 8 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
PrometheusRemoteWriteProvider,
)
from charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm
from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer
from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer, charm_tracing_config
from charms.traefik_k8s.v1.ingress_per_unit import (
IngressPerUnitReadyForUnitEvent,
IngressPerUnitRequirer,
Expand Down Expand Up @@ -120,8 +120,8 @@ def to_status(tpl: Tuple[str, str]) -> StatusBase:


@trace_charm(
tracing_endpoint="tracing_endpoint",
server_cert="server_ca_cert_path",
tracing_endpoint="charm_tracing_endpoint",
server_cert="server_cert",
extra_types=[
KubernetesComputeResourcesPatch,
CertHandler,
Expand Down Expand Up @@ -222,7 +222,16 @@ def __init__(self, *args):
)

self.catalogue = CatalogueConsumer(charm=self, item=self._catalogue_item)
self.tracing = TracingEndpointRequirer(self, protocols=["otlp_http"])
self.charm_tracing = TracingEndpointRequirer(
self, relation_name="charm-tracing", protocols=["otlp_http"]
)
self.workload_tracing = TracingEndpointRequirer(
self, relation_name="workload-tracing", protocols=["otlp_grpc"]
)

self.charm_tracing_endpoint, self.server_cert = charm_tracing_config(
self.charm_tracing, self._ca_cert_path
)

self.framework.observe(self.on.prometheus_pebble_ready, self._on_pebble_ready)
self.framework.observe(self.on.config_changed, self._configure)
Expand Down Expand Up @@ -404,6 +413,12 @@ def _prometheus_layer(self) -> Layer:
a Pebble layer specification for the Prometheus workload container.
"""
logger.debug("Building pebble layer")
environment = {}
if self.workload_tracing_endpoint:
# tracing is ready to serve traffic, so we can add the topology.
environment["OTEL_RESOURCE_ATTRIBUTES"] = (
f"juju_application={self._topology.application},juju_model={self._topology.model},juju_model_uuid={self._topology.model_uuid},juju_unit={self._topology.unit},juju_charm={self._topology.charm_name}"
)
layer_config = {
"summary": "Prometheus layer",
"description": "Pebble layer configuration for Prometheus",
Expand All @@ -413,6 +428,7 @@ def _prometheus_layer(self) -> Layer:
"summary": "prometheus daemon",
"command": self._generate_command(),
"startup": "enabled",
"environment": environment,
}
},
}
Expand Down Expand Up @@ -947,6 +963,22 @@ def _alerting_config(self) -> dict:
)
return alerting_config

def _tracing_config(self) -> dict:
config = {
"endpoint": self.workload_tracing.get_endpoint("otlp_grpc"),
"sampling_fraction": 1,
}
if self.server_cert:
config["insecure"] = False
config["tls_config"] = {
"ca_file": self.server_cert,
"cert_file": CERT_PATH,
"key_file": KEY_PATH,
}
else:
config["insecure"] = True
return config

def _generate_prometheus_config(self) -> bool:
"""Construct Prometheus configuration and write to filesystem.
Expand Down Expand Up @@ -974,6 +1006,9 @@ def _generate_prometheus_config(self) -> bool:

web_config = self._web_config()

if self.workload_tracing_endpoint:
prometheus_config["tracing"] = self._tracing_config()

# Check if config changed, using its hash
config_hash = sha256(
yaml.safe_dump(
Expand Down Expand Up @@ -1072,10 +1107,10 @@ def _push(self, path, contents):
self.container.push(path, contents, make_dirs=True, encoding="utf-8")

@property
def tracing_endpoint(self) -> Optional[str]:
"""Tempo endpoint for charm tracing."""
if self.tracing.is_ready():
return self.tracing.get_endpoint("otlp_http")
def workload_tracing_endpoint(self) -> Optional[str]:
"""Tempo endpoint for workload tracing."""
if self.workload_tracing.is_ready():
return self.workload_tracing.get_endpoint("otlp_grpc")
return None

@property
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ deps =
pytest-operator
prometheus-api-client
tenacity
websockets < 14.0
# https://github.com/juju/python-libjuju/issues/1184
commands =
pytest -vv --tb native --log-cli-level=INFO --color=yes -s {posargs} {toxinidir}/tests/integration
allowlist_externals =
Expand Down

0 comments on commit 3d0ac1a

Please sign in to comment.