Skip to content

Commit

Permalink
Merge pull request #7 from canonical/OPENG-2298
Browse files Browse the repository at this point in the history
Add a way to configure extra receivers for uncharmed applications
  • Loading branch information
michaeldmitry authored Jul 1, 2024
2 parents e8abede + 3ac7474 commit 7d1d957
Show file tree
Hide file tree
Showing 14 changed files with 713 additions and 587 deletions.
14 changes: 14 additions & 0 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,17 @@ parts:
- "jsonschema"
- "opentelemetry-exporter-otlp-proto-http==1.21.0"

config:
options:
always_enable_zipkin:
description: Force-enable the receiver for the 'zipkin' protocol in Tempo, even if there is no integration currently requesting it.
type: boolean
default: false
always_enable_otlp_grpc:
description: Force-enable the receiver for the 'otlp_grpc' protocol in Tempo, even if there is no integration currently requesting it.
type: boolean
default: false
always_enable_otlp_http:
description: Force-enable the receiver for the 'otlp_http' protocol in Tempo, even if there is no integration currently requesting it.
type: boolean
default: false
28 changes: 22 additions & 6 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import socket
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple
from typing import Dict, List, Optional, Set, Tuple, get_args

import ops
from charms.data_platform_libs.v0.s3 import S3Requirer
Expand Down Expand Up @@ -61,9 +61,6 @@ def __init__(self, *args):

self.tempo = tempo = Tempo(
external_host=self.hostname,
# we need otlp_http receiver for charm_tracing
# TODO add any extra receivers enabled manually via config
enable_receivers=["otlp_http"],
use_tls=self.tls_available,
)

Expand Down Expand Up @@ -250,6 +247,21 @@ def _local_ip(self) -> Optional[str]:
)
return None

@property
def enabled_receivers(self) -> Set[str]:
"""Extra receivers enabled through config"""
enabled_receivers = set()
# otlp_http is needed by charm_tracing
enabled_receivers.add("otlp_http")
enabled_receivers.update(
[
receiver
for receiver in get_args(ReceiverProtocol)
if self.config.get(f"always_enable_{receiver}") is True
]
)
return enabled_receivers

##################
# EVENT HANDLERS #
##################
Expand Down Expand Up @@ -392,13 +404,14 @@ def _update_tracing_relations(self):
self._update_tempo_cluster()

def _requested_receivers(self) -> Tuple[ReceiverProtocol, ...]:
"""List what receivers we should activate, based on the active tracing relations."""
"""List what receivers we should activate, based on the active tracing relations and config-enabled extra receivers."""
# we start with the sum of the requested endpoints from the requirers
requested_protocols = set(self.tracing.requested_protocols())

# update with enabled extra receivers
requested_protocols.update(self.enabled_receivers)
# and publish only those we support
requested_receivers = requested_protocols.intersection(set(self.tempo.receiver_ports))
requested_receivers.update(self.tempo.enabled_receivers)
return tuple(requested_receivers)

def server_cert(self):
Expand Down Expand Up @@ -466,6 +479,9 @@ def _update_tempo_cluster(self):
logger.error("skipped tempo cluster update: inconsistent state")
return

if not self.unit.is_leader():
return

kwargs = {}

if self.tls_available:
Expand Down
4 changes: 2 additions & 2 deletions src/prometheus_alert_rules/tempo_workers/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ groups:
for: 15m
labels:
severity: critical
- alert: TempoRequestErrors
- alert: TempoWorkerRequestErrors
annotations:
message: |
The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors.
Expand All @@ -27,7 +27,7 @@ groups:
for: 15m
labels:
severity: critical
- alert: TempoRequestLatency
- alert: TempoWorkerRequestLatency
annotations:
message: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
Expand Down
6 changes: 2 additions & 4 deletions src/tempo.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,12 @@ class Tempo:
def __init__(
self,
external_host: Optional[str] = None,
enable_receivers: Optional[Sequence[ReceiverProtocol]] = None,
use_tls: bool = False,
):
# ports source: https://github.com/grafana/tempo/blob/main/example/docker-compose/local/docker-compose.yaml

# fqdn, if an ingress is not available, else the ingress address.
self._external_hostname = external_host or socket.getfqdn()
self.enabled_receivers = enable_receivers or []
self.use_tls = use_tls

@property
Expand Down Expand Up @@ -267,8 +265,8 @@ def is_ready(self):

def _build_receivers_config(self, receivers: Sequence[ReceiverProtocol]): # noqa: C901
# receivers: the receivers we have to enable because the requirers we're related to
# intend to use them
# it already includes self.enabled_receivers: receivers we have to enable because *this charm* will use them.
# intend to use them. It already includes receivers that are always enabled
# through config or because *this charm* will use them.
receivers_set = set(receivers)

if not receivers_set:
Expand Down
30 changes: 30 additions & 0 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import logging
import subprocess
from dataclasses import dataclass
from typing import Dict

import yaml
from pytest_operator.plugin import OpsTest

_JUJU_DATA_CACHE = {}
_JUJU_KEYS = ("egress-subnets", "ingress-address", "private-address")

logger = logging.getLogger(__name__)


def purge(data: dict):
for key in _JUJU_KEYS:
Expand Down Expand Up @@ -157,3 +161,29 @@ def get_relation_data(
requirer_endpoint, provider_endpoint, include_default_juju_keys, model
)
return RelationData(provider=provider_data, requirer=requirer_data)


async def deploy_literal_bundle(ops_test: OpsTest, bundle: str):
run_args = [
"juju",
"deploy",
"--trust",
"-m",
ops_test.model_name,
str(ops_test.render_bundle(bundle)),
]

retcode, stdout, stderr = await ops_test.run(*run_args)
assert retcode == 0, f"Deploy failed: {(stderr or stdout).strip()}"
logger.info(stdout)


async def run_command(model_name: str, app_name: str, unit_num: int, command: list) -> bytes:
cmd = ["juju", "ssh", "--model", model_name, f"{app_name}/{unit_num}", *command]
try:
res = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
logger.info(res)
except subprocess.CalledProcessError as e:
logger.error(e.stdout.decode())
raise e
return res.stdout
11 changes: 4 additions & 7 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

logger = logging.getLogger(__name__)

METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
mc = SimpleNamespace(name="mc")


Expand All @@ -33,9 +33,6 @@ async def test_build_and_deploy(ops_test: OpsTest):
{mc.name}:
charm: {charm}
trust: true
resources:
nginx-image: {METADATA["resources"]["nginx-image"]["upstream-source"]}
nginx-prometheus-exporter-image: {METADATA["resources"]["nginx-prometheus-exporter-image"]["upstream-source"]}
scale: 1
loki:
charm: loki-k8s
Expand All @@ -54,9 +51,9 @@ async def test_build_and_deploy(ops_test: OpsTest):
scale: 1
relations:
- [mc:logging-consumer, loki:logging]
- [mc:self-metrics-endpoint, prometheus:metrics-endpoint]
- [mc:grafana-dashboards-provider, grafana:grafana-dashboard]
- [mc:logging, loki:logging]
- [mc:metrics-endpoint, prometheus:metrics-endpoint]
- [mc:grafana-dashboard, grafana:grafana-dashboard]
"""
)

Expand Down
Loading

0 comments on commit 7d1d957

Please sign in to comment.