Merge pull request #7 from canonical/OPENG-2298

Add a way to configure extra receivers for uncharmed applications
canonical · Jul 1, 2024 · 7d1d957 · 7d1d957
2 parents e8abede + 3ac7474
commit 7d1d957
Show file tree

Hide file tree

Showing 14 changed files with 713 additions and 587 deletions.
diff --git a/charmcraft.yaml b/charmcraft.yaml
@@ -106,3 +106,17 @@ parts:
       - "jsonschema"
       - "opentelemetry-exporter-otlp-proto-http==1.21.0"
 
+config:
+  options:
+    always_enable_zipkin:
+      description: Force-enable the receiver for the 'zipkin' protocol in Tempo, even if there is no integration currently requesting it.
+      type: boolean
+      default: false
+    always_enable_otlp_grpc:
+      description: Force-enable the receiver for the 'otlp_grpc' protocol in Tempo, even if there is no integration currently requesting it.
+      type: boolean
+      default: false
+    always_enable_otlp_http:
+      description: Force-enable the receiver for the 'otlp_http' protocol in Tempo, even if there is no integration currently requesting it.
+      type: boolean
+      default: false
diff --git a/src/charm.py b/src/charm.py
@@ -7,7 +7,7 @@
 import logging
 import socket
 from pathlib import Path
-from typing import Dict, List, Optional, Set, Tuple
+from typing import Dict, List, Optional, Set, Tuple, get_args
 
 import ops
 from charms.data_platform_libs.v0.s3 import S3Requirer
@@ -61,9 +61,6 @@ def __init__(self, *args):
 
         self.tempo = tempo = Tempo(
             external_host=self.hostname,
-            # we need otlp_http receiver for charm_tracing
-            # TODO add any extra receivers enabled manually via config
-            enable_receivers=["otlp_http"],
             use_tls=self.tls_available,
         )
 
@@ -250,6 +247,21 @@ def _local_ip(self) -> Optional[str]:
             )
             return None
 
+    @property
+    def enabled_receivers(self) -> Set[str]:
+        """Extra receivers enabled through config"""
+        enabled_receivers = set()
+        # otlp_http is needed by charm_tracing
+        enabled_receivers.add("otlp_http")
+        enabled_receivers.update(
+            [
+                receiver
+                for receiver in get_args(ReceiverProtocol)
+                if self.config.get(f"always_enable_{receiver}") is True
+            ]
+        )
+        return enabled_receivers
+
     ##################
     # EVENT HANDLERS #
     ##################
@@ -392,13 +404,14 @@ def _update_tracing_relations(self):
         self._update_tempo_cluster()
 
     def _requested_receivers(self) -> Tuple[ReceiverProtocol, ...]:
-        """List what receivers we should activate, based on the active tracing relations."""
+        """List what receivers we should activate, based on the active tracing relations and config-enabled extra receivers."""
         # we start with the sum of the requested endpoints from the requirers
         requested_protocols = set(self.tracing.requested_protocols())
 
+        # update with enabled extra receivers
+        requested_protocols.update(self.enabled_receivers)
         # and publish only those we support
         requested_receivers = requested_protocols.intersection(set(self.tempo.receiver_ports))
-        requested_receivers.update(self.tempo.enabled_receivers)
         return tuple(requested_receivers)
 
     def server_cert(self):
@@ -466,6 +479,9 @@ def _update_tempo_cluster(self):
             logger.error("skipped tempo cluster update: inconsistent state")
             return
 
+        if not self.unit.is_leader():
+            return
+
         kwargs = {}
 
         if self.tls_available:

diff --git a/src/prometheus_alert_rules/tempo_workers/alerts.yaml b/src/prometheus_alert_rules/tempo_workers/alerts.yaml
@@ -14,7 +14,7 @@ groups:
     for: 15m
     labels:
       severity: critical
-  - alert: TempoRequestErrors
+  - alert: TempoWorkerRequestErrors
     annotations:
       message: |
         The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors.
@@ -27,7 +27,7 @@ groups:
     for: 15m
     labels:
       severity: critical
-  - alert: TempoRequestLatency
+  - alert: TempoWorkerRequestLatency
     annotations:
       message: |
         {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.

diff --git a/src/tempo.py b/src/tempo.py
@@ -68,14 +68,12 @@ class Tempo:
     def __init__(
         self,
         external_host: Optional[str] = None,
-        enable_receivers: Optional[Sequence[ReceiverProtocol]] = None,
         use_tls: bool = False,
     ):
         # ports source: https://github.com/grafana/tempo/blob/main/example/docker-compose/local/docker-compose.yaml
 
         # fqdn, if an ingress is not available, else the ingress address.
         self._external_hostname = external_host or socket.getfqdn()
-        self.enabled_receivers = enable_receivers or []
         self.use_tls = use_tls
 
     @property
@@ -267,8 +265,8 @@ def is_ready(self):
 
     def _build_receivers_config(self, receivers: Sequence[ReceiverProtocol]):  # noqa: C901
         # receivers: the receivers we have to enable because the requirers we're related to
-        # intend to use them
-        # it already includes self.enabled_receivers: receivers we have to enable because *this charm* will use them.
+        # intend to use them. It already includes receivers that are always enabled
+        # through config or because *this charm* will use them.
         receivers_set = set(receivers)
 
         if not receivers_set:

diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
@@ -1,12 +1,16 @@
+import logging
 import subprocess
 from dataclasses import dataclass
 from typing import Dict
 
 import yaml
+from pytest_operator.plugin import OpsTest
 
 _JUJU_DATA_CACHE = {}
 _JUJU_KEYS = ("egress-subnets", "ingress-address", "private-address")
 
+logger = logging.getLogger(__name__)
+
 
 def purge(data: dict):
     for key in _JUJU_KEYS:
@@ -157,3 +161,29 @@ def get_relation_data(
         requirer_endpoint, provider_endpoint, include_default_juju_keys, model
     )
     return RelationData(provider=provider_data, requirer=requirer_data)
+
+
+async def deploy_literal_bundle(ops_test: OpsTest, bundle: str):
+    run_args = [
+        "juju",
+        "deploy",
+        "--trust",
+        "-m",
+        ops_test.model_name,
+        str(ops_test.render_bundle(bundle)),
+    ]
+
+    retcode, stdout, stderr = await ops_test.run(*run_args)
+    assert retcode == 0, f"Deploy failed: {(stderr or stdout).strip()}"
+    logger.info(stdout)
+
+
+async def run_command(model_name: str, app_name: str, unit_num: int, command: list) -> bytes:
+    cmd = ["juju", "ssh", "--model", model_name, f"{app_name}/{unit_num}", *command]
+    try:
+        res = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+        logger.info(res)
+    except subprocess.CalledProcessError as e:
+        logger.error(e.stdout.decode())
+        raise e
+    return res.stdout
diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py
@@ -14,7 +14,7 @@
 
 logger = logging.getLogger(__name__)
 
-METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
+METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
 mc = SimpleNamespace(name="mc")
 
 
@@ -33,9 +33,6 @@ async def test_build_and_deploy(ops_test: OpsTest):
           {mc.name}:
             charm: {charm}
             trust: true
-            resources:
-              nginx-image: {METADATA["resources"]["nginx-image"]["upstream-source"]}
-              nginx-prometheus-exporter-image: {METADATA["resources"]["nginx-prometheus-exporter-image"]["upstream-source"]}
             scale: 1
           loki:
             charm: loki-k8s
@@ -54,9 +51,9 @@ async def test_build_and_deploy(ops_test: OpsTest):
             scale: 1
 
         relations:
-        - [mc:logging-consumer, loki:logging]
-        - [mc:self-metrics-endpoint, prometheus:metrics-endpoint]
-        - [mc:grafana-dashboards-provider, grafana:grafana-dashboard]
+        - [mc:logging, loki:logging]
+        - [mc:metrics-endpoint, prometheus:metrics-endpoint]
+        - [mc:grafana-dashboard, grafana:grafana-dashboard]
     """
     )