From bd6d33c7cf56bbac66d997ead094645e65cb0c5a Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Wed, 17 Jul 2024 19:26:32 +0300
Subject: [PATCH 01/22] implement generic coord

---
 src/charm.py                           | 452 +++++++------------------
 src/coordinator.py                     |  85 -----
 src/{nginx.py => nginx_config.py}      | 152 ++-------
 src/nginx_prometheus_exporter.py       |  44 ---
 src/tempo.py                           | 176 +++-------
 src/tempo_cluster.py                   | 391 ---------------------
 src/tempo_config.py                    |  84 ++++-
 tests/scenario/conftest.py             |   2 +-
 tests/scenario/helpers.py              |   1 -
 tests/scenario/test_config.py          |   2 +-
 tests/scenario/test_nginx.py           |   4 +-
 tests/scenario/test_tempo_clustered.py |   2 +-
 tests/unit/test_coherence.py           |   1 -
 13 files changed, 282 insertions(+), 1114 deletions(-)
 delete mode 100644 src/coordinator.py
 rename src/{nginx.py => nginx_config.py} (61%)
 delete mode 100644 src/nginx_prometheus_exporter.py
 delete mode 100644 src/tempo_cluster.py

diff --git a/src/charm.py b/src/charm.py
index 276f4f1..5947939 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -3,45 +3,36 @@
 # See LICENSE file for licensing details.
 
 """Charmed Operator for Tempo; a lightweight object storage based tracing backend."""
-import json
 import logging
 import socket
 from pathlib import Path
-from typing import Dict, List, Optional, Set, Tuple, get_args
+from subprocess import CalledProcessError, getoutput
+from typing import Dict, Optional, Set, Tuple, get_args
 
 import ops
-from charms.data_platform_libs.v0.s3 import S3Requirer
-from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider
 from charms.grafana_k8s.v0.grafana_source import GrafanaSourceProvider
-from charms.observability_libs.v1.cert_handler import VAULT_SECRET_LABEL, CertHandler
-from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
 from charms.tempo_k8s.v1.charm_tracing import trace_charm
 from charms.tempo_k8s.v2.tracing import (
     ReceiverProtocol,
     RequestEvent,
     TracingEndpointProvider,
+    receiver_protocol_to_transport_protocol,
 )
 from charms.traefik_route_k8s.v0.traefik_route import TraefikRouteRequirer
-from ops.charm import CharmBase, CollectStatusEvent, RelationEvent
+from cosl.coordinated_workers.coordinator import Coordinator
+from ops.charm import CharmBase, RelationEvent
 from ops.main import main
-from ops.model import ActiveStatus, BlockedStatus, Relation, WaitingStatus
 
-from coordinator import TempoCoordinator
-from nginx import Nginx
-from nginx_prometheus_exporter import NginxPrometheusExporter
+from nginx_config import NginxConfig
 from tempo import Tempo
-from tempo_cluster import TempoClusterProvider
+from tempo_config import TempoRolesConfig
 
 logger = logging.getLogger(__name__)
 
 
-class S3NotFoundError(Exception):
-    """Raised when the s3 integration is not present or not ready."""
-
-
 @trace_charm(
     tracing_endpoint="tempo_otlp_http_endpoint",
-    server_cert="server_cert",
+    server_cert="server_ca_cert",
     extra_types=(Tempo, TracingEndpointProvider),
 )
 class TempoCoordinatorCharm(CharmBase):
@@ -51,29 +42,27 @@ def __init__(self, *args):
         super().__init__(*args)
 
         self.ingress = TraefikRouteRequirer(self, self.model.get_relation("ingress"), "ingress")  # type: ignore
-        self.tempo_cluster = TempoClusterProvider(self)
-        self.coordinator = TempoCoordinator(self.tempo_cluster)
-
-        # keep this above Tempo instantiation, as we need it in self.tls_enabled
-        self.cert_handler = CertHandler(
-            self,
-            key="tempo-server-cert",
-            sans=[self.hostname],
-        )
-
-        self.tempo = tempo = Tempo(
-            external_host=self.hostname,
-            use_tls=self.tls_available,
-        )
-
-        self.s3_requirer = S3Requirer(self, Tempo.s3_relation_name, Tempo.s3_bucket_name)
-
-        self.nginx = Nginx(
-            self,
-            cluster_provider=self.tempo_cluster,
-            server_name=self.hostname,
+        self.tempo = Tempo(requested_receivers=self._requested_receivers)
+        # set the open ports for this unit
+        self.unit.set_ports(*self.tempo.all_ports.values())
+        self.coordinator = Coordinator(
+            charm=self,
+            roles_config=TempoRolesConfig(),
+            s3_bucket_name=Tempo.s3_bucket_name,
+            external_url=self._external_url,
+            worker_metrics_port="8080",
+            endpoints={
+                "certificates": "certificates",
+                "cluster": "tempo-cluster",
+                "grafana-dashboards": "grafana-dashboard",
+                "logging": "logging",
+                "metrics": "metrics-endpoint",
+                "s3": "s3",
+            },
+            nginx_config=NginxConfig(server_name=self.hostname).config,
+            workers_config=self.tempo.config,
+            tracing_receivers=self.requested_receivers_urls,
         )
-        self.nginx_prometheus_exporter = NginxPrometheusExporter(self)
 
         # configure this tempo as a datasource in grafana
         self.grafana_source_provider = GrafanaSourceProvider(
@@ -82,91 +71,47 @@ def __init__(self, *args):
             source_url=self._external_http_server_url,
             refresh_event=[
                 # refresh the source url when TLS config might be changing
-                self.on[self.cert_handler.certificates_relation_name].relation_changed,
+                self.on[self.coordinator.cert_handler.certificates_relation_name].relation_changed,
                 # or when ingress changes
                 self.ingress.on.ready,
             ],
         )
-        # # Patch the juju-created Kubernetes service to contain the right ports
-        self.unit.set_ports(*self.tempo.all_ports.values())
-
-        # Provide ability for Tempo to be scraped by Prometheus using prometheus_scrape
-        self._scraping = MetricsEndpointProvider(
-            self,
-            relation_name="metrics-endpoint",
-            jobs=[{"static_configs": [{"targets": [f"*:{tempo.tempo_http_server_port}"]}]}],
-        )
-        self._grafana_dashboards = GrafanaDashboardProvider(
-            self, relation_name="grafana-dashboard"
-        )
 
         self.tracing = TracingEndpointProvider(self, external_url=self._external_url)
-        self._inconsistencies = self.coordinator.get_deployment_inconsistencies(
-            has_s3=self.s3_ready
-        )
-        self._is_consistent = not self._inconsistencies
 
         # We always listen to collect-status
         self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status)
 
-        if not self._is_consistent:
-            logger.error(
-                f"Inconsistent deployment. {self.unit.name} will be shutting down. "
-                "This likely means you need to add an s3 integration. "
-                "This charm will be unresponsive and refuse to handle any event until "
-                "the situation is resolved by the cloud admin, to avoid data loss."
-            )
-            return  # refuse to handle any other event as we can't possibly know what to do.
+        # refuse to handle any other event as we can't possibly know what to do.
+        if (
+            not self.coordinator.cluster.has_workers
+            or not self.coordinator.is_coherent
+            or not self.coordinator.s3_ready
+        ):
+            return
 
         # lifecycle
         self.framework.observe(self.on.leader_elected, self._on_leader_elected)
-        self.framework.observe(self.on.update_status, self._on_update_status)
-        self.framework.observe(self.on.config_changed, self._on_config_changed)
         self.framework.observe(self.on.list_receivers_action, self._on_list_receivers_action)
 
-        # nginx
-        self.framework.observe(self.on.nginx_pebble_ready, self._on_nginx_pebble_ready)
-        self.framework.observe(
-            self.on.nginx_prometheus_exporter_pebble_ready,
-            self._on_nginx_prometheus_exporter_pebble_ready,
-        )
-
         # ingress
         ingress = self.on["ingress"]
         self.framework.observe(ingress.relation_created, self._on_ingress_relation_created)
         self.framework.observe(ingress.relation_joined, self._on_ingress_relation_joined)
         self.framework.observe(self.ingress.on.ready, self._on_ingress_ready)
 
-        # s3
-        self.framework.observe(
-            self.s3_requirer.on.credentials_changed, self._on_s3_credentials_changed
-        )
-        self.framework.observe(self.s3_requirer.on.credentials_gone, self._on_s3_credentials_gone)
-
         # tracing
         self.framework.observe(self.tracing.on.request, self._on_tracing_request)
         self.framework.observe(self.tracing.on.broken, self._on_tracing_broken)
-        self.framework.observe(self.on.peers_relation_created, self._on_peers_relation_created)
-        self.framework.observe(self.on.peers_relation_changed, self._on_peers_relation_changed)
 
         # tls
-        self.framework.observe(self.cert_handler.on.cert_changed, self._on_cert_handler_changed)
-
-        # cluster
-        self.framework.observe(self.tempo_cluster.on.changed, self._on_tempo_cluster_changed)
-
-        for evt in self.on.events().values():
-            self.framework.observe(evt, self._on_event)  # type: ignore
+        self.framework.observe(
+            self.coordinator.cert_handler.on.cert_changed, self._on_cert_handler_changed
+        )
 
     ######################
     # UTILITY PROPERTIES #
     ######################
-
-    @property
-    def is_clustered(self) -> bool:
-        """Check whether this Tempo is a coordinator and has worker nodes connected to it."""
-        return self.tempo_cluster.has_workers
-
     @property
     def hostname(self) -> str:
         """Unit's hostname."""
@@ -195,77 +140,12 @@ def _external_url(self) -> str:
 
     @property
     def _internal_url(self) -> str:
-        scheme = "https" if self.tls_available else "http"
+        """Returns workload's FQDN."""
+        scheme = "http"
+        if hasattr(self, "coordinator") and self.coordinator.nginx.are_certificates_on_disk:
+            scheme = "https"
         return f"{scheme}://{self.hostname}"
 
-    @property
-    def tls_available(self) -> bool:
-        """Return True if tls is enabled and the necessary certs are found."""
-        return (
-            self.cert_handler.enabled
-            and (self.cert_handler.server_cert is not None)
-            and (self.cert_handler.private_key is not None)
-            and (self.cert_handler.ca_cert is not None)
-        )
-
-    @property
-    def _s3_config(self) -> dict:
-        s3_config = self.s3_requirer.get_s3_connection_info()
-        if (
-            s3_config
-            and "bucket" in s3_config
-            and "endpoint" in s3_config
-            and "access-key" in s3_config
-            and "secret-key" in s3_config
-        ):
-            return s3_config
-        raise S3NotFoundError("s3 integration inactive")
-
-    @property
-    def s3_ready(self) -> bool:
-        """Check whether s3 is configured."""
-        try:
-            return bool(self._s3_config)
-        except S3NotFoundError:
-            return False
-
-    @property
-    def peer_addresses(self) -> List[str]:
-        peers = self._peers
-        relation = self.model.get_relation("peers")
-        # get unit addresses for all the other units from a databag
-        if peers and relation:
-            addresses = [relation.data[unit].get("local-ip") for unit in peers]
-            addresses = list(filter(None, addresses))
-        else:
-            addresses = []
-
-        # add own address
-        if self._local_ip:
-            addresses.append(self._local_ip)
-
-        return addresses
-
-    @property
-    def _local_ip(self) -> Optional[str]:
-        try:
-            binding = self.model.get_binding("peers")
-            if not binding:
-                logger.error(
-                    "unable to get local IP at this time: "
-                    "peers binding not active yet. It could be that the charm "
-                    "is still being set up..."
-                )
-                return None
-            return str(binding.network.bind_address)
-        except (ops.ModelError, KeyError) as e:
-            logger.debug("failed to obtain local ip from peers binding", exc_info=True)
-            logger.error(
-                f"unable to get local IP at this time: failed with {type(e)}; "
-                f"see debug log for more info"
-            )
-            return None
-
     @property
     def enabled_receivers(self) -> Set[str]:
         """Extra receivers enabled through config"""
@@ -288,39 +168,26 @@ def _on_tracing_broken(self, _):
         """Update tracing relations' databags once one relation is removed."""
         self._update_tracing_relations()
 
-    def _on_cert_handler_changed(self, _):
-        if self.tls_available:
-            logger.debug("enabling TLS")
-            self.nginx.configure_tls(
-                server_cert=self.cert_handler.server_cert,  # type: ignore
-                ca_cert=self.cert_handler.ca_cert,  # type: ignore
-                private_key=self.cert_handler.private_key,  # type: ignore
-            )
-        else:
-            logger.debug("disabling TLS")
-            self.nginx.delete_certificates()
+    def _on_cert_handler_changed(self, e: ops.RelationChangedEvent):
+
+        self.coordinator._on_cert_handler_changed(e)
 
         # tls readiness change means config change.
         # sync scheme change with traefik and related consumers
         self._configure_ingress()
 
-        # sync the server cert with the charm container.
+        # sync the server CA cert with the charm container.
         # technically, because of charm tracing, this will be called first thing on each event
-        self._update_server_cert()
+        self._update_server_ca_cert()
 
         # update relations to reflect the new certificate
         self._update_tracing_relations()
 
-        # notify the cluster
-        self._update_tempo_cluster()
-
     def _on_tracing_request(self, e: RequestEvent):
         """Handle a remote requesting a tracing endpoint."""
         logger.debug(f"received tracing request from {e.relation.app}: {e.requested_receivers}")
         self._update_tracing_relations()
 
-    def _on_tempo_cluster_changed(self, _: RelationEvent):
-        self._update_tempo_cluster()
 
     def _on_ingress_relation_created(self, _: RelationEvent):
         self._configure_ingress()
@@ -332,29 +199,6 @@ def _on_leader_elected(self, _: ops.LeaderElectedEvent):
         # as traefik_route goes through app data, we need to take lead of traefik_route if our leader dies.
         self._configure_ingress()
 
-    def _on_s3_credentials_changed(self, _):
-        self._on_s3_changed()
-
-    def _on_s3_credentials_gone(self, _):
-        self._on_s3_changed()
-
-    def _on_s3_changed(self):
-        self._update_tempo_cluster()
-
-    def _on_peers_relation_created(self, event: ops.RelationCreatedEvent):
-        if self._local_ip:
-            event.relation.data[self.unit]["local-ip"] = self._local_ip
-
-    def _on_peers_relation_changed(self, _):
-        self._update_tempo_cluster()
-
-    def _on_config_changed(self, _):
-        # check if certificate files haven't disappeared and recreate them if needed
-        self._update_tempo_cluster()
-
-    def _on_update_status(self, _):
-        """Update the status of the application."""
-
     def _on_ingress_ready(self, _event):
         # whenever there's a change in ingress, we need to update all tracing relations
         self._update_tracing_relations()
@@ -366,52 +210,16 @@ def _on_ingress_revoked(self, _event):
     def _on_list_receivers_action(self, event: ops.ActionEvent):
         res = {}
         for receiver in self._requested_receivers():
-            res[receiver.replace("_", "-")] = (
-                f"{self.ingress.external_host or self.tempo.url}:{self.tempo.receiver_ports[receiver]}"
-            )
+            res[receiver.replace("_", "-")] = self.get_receiver_url(receiver)
         event.set_results(res)
 
     # keep this event handler at the bottom
-    def _on_collect_unit_status(self, e: CollectStatusEvent):
-        # todo add [nginx.workload] statuses
-
-        if not self.tempo.is_ready:
-            e.add_status(WaitingStatus("[workload.tempo] Tempo API not ready just yet..."))
-
-        # TODO: should we set these statuses on the leader only, or on all units?
-        if issues := self._inconsistencies:
-            for issue in issues:
-                e.add_status(BlockedStatus("[consistency.issues]" + issue))
-            e.add_status(BlockedStatus("[consistency] Unit *disabled*."))
-        else:
-            if self.is_clustered:
-                # no issues: tempo is consistent
-                if not self.coordinator.is_recommended:
-                    e.add_status(ActiveStatus("[coordinator] degraded"))
-                else:
-                    e.add_status(ActiveStatus())
-            else:
-                e.add_status(ActiveStatus())
-
-    def _on_nginx_pebble_ready(self, _) -> None:
-        self.nginx.configure_pebble_layer()
-
-    def _on_nginx_prometheus_exporter_pebble_ready(self, _) -> None:
-        self.nginx_prometheus_exporter.configure_pebble_layer()
-
-    def _on_event(self, event) -> None:
-        """A set of common configuration actions that should happen on every event."""
-        if isinstance(event, CollectStatusEvent):
-            return
-        # plan layers
-        self.nginx.configure_pebble_layer()
-        self.nginx_prometheus_exporter.configure_pebble_layer()
-        # configure ingress
-        self._configure_ingress()
-        # update cluster relations
-        self._update_tempo_cluster()
-        # update tracing relations
-        self._update_tracing_relations()
+    def _on_collect_unit_status(self, e: ops.CollectStatusEvent):
+        self.coordinator._on_collect_unit_status(e)
+        # add Tempo charm custom blocking conditions
+        # TODO: avoid waiting for update-status event
+        # if not self.is_workload_ready():
+        #     e.add_status(ops.WaitingStatus("[workload.tempo] Tempo API not ready just yet..."))
 
     ###################
     # UTILITY METHODS #
@@ -429,7 +237,7 @@ def _configure_ingress(self) -> None:
                 self._update_tracing_relations()
 
         # notify the cluster
-        self._update_tempo_cluster()
+        self.coordinator.update_cluster()
 
     def _update_tracing_relations(self) -> None:
         tracing_relations = self.model.relations["tracing"]
@@ -443,10 +251,10 @@ def _update_tracing_relations(self) -> None:
         # publish requested protocols to all relations
         if self.unit.is_leader():
             self.tracing.publish_receivers(
-                [(p, self.tempo.get_receiver_url(p, self.ingress)) for p in requested_receivers]
+                [(p, self.get_receiver_url(p)) for p in requested_receivers]
             )
 
-        self._update_tempo_cluster()
+        self.coordinator.update_cluster()
 
     def _requested_receivers(self) -> Tuple[ReceiverProtocol, ...]:
         """List what receivers we should activate, based on the active tracing relations and config-enabled extra receivers."""
@@ -459,95 +267,34 @@ def _requested_receivers(self) -> Tuple[ReceiverProtocol, ...]:
         requested_receivers = requested_protocols.intersection(set(self.tempo.receiver_ports))
         return tuple(requested_receivers)
 
-    def server_cert(self) -> str:
+    def server_ca_cert(self) -> str:
         """For charm tracing."""
-        self._update_server_cert()
-        return self.tempo.server_cert_path
-
-    def _update_server_cert(self) -> None:
-        """Server certificate for charm tracing tls, if tls is enabled."""
-        server_cert = Path(self.tempo.server_cert_path)
-        if self.tls_available:
-            if not server_cert.exists():
-                server_cert.parent.mkdir(parents=True, exist_ok=True)
-                if self.cert_handler.server_cert:
-                    server_cert.write_text(self.cert_handler.server_cert)
+        self._update_server_ca_cert()
+        return self.tempo.tls_ca_path
+
+    def _update_server_ca_cert(self) -> None:
+        """Server CA certificate for charm tracing tls, if tls is enabled."""
+        server_ca_cert = Path(self.tempo.tls_ca_path)
+        if self.coordinator.tls_available:
+            if not server_ca_cert.exists():
+                server_ca_cert.parent.mkdir(parents=True, exist_ok=True)
+                if self.coordinator.cert_handler.ca_cert:
+                    server_ca_cert.write_text(self.coordinator.cert_handler.ca_cert)
         else:  # tls unavailable: delete local cert
-            server_cert.unlink(missing_ok=True)
+            server_ca_cert.unlink(missing_ok=True)
 
     def tempo_otlp_http_endpoint(self) -> Optional[str]:
         """Endpoint at which the charm tracing information will be forwarded."""
         # the charm container and the tempo workload container have apparently the same
         # IP, so we can talk to tempo at localhost.
-        if self.tempo.is_ready:
+        if self.is_workload_ready():
             return f"{self._internal_url}:{self.tempo.receiver_ports['otlp_http']}"
 
-        return None
-
-    @property
-    def _peers(self) -> Optional[Set[ops.model.Unit]]:
-        relation = self.model.get_relation("peers")
-        if not relation:
-            return None
-
-        # self is not included in relation.units
-        return relation.units
-
-    @property
-    def loki_endpoints_by_unit(self) -> Dict[str, str]:
-        """Loki endpoints from relation data in the format needed for Pebble log forwarding.
-
-        Returns:
-            A dictionary of remote units and the respective Loki endpoint.
-            {
-                "loki/0": "http://loki:3100/loki/api/v1/push",
-                "another-loki/0": "http://another-loki:3100/loki/api/v1/push",
-            }
-        """
-        endpoints: Dict = {}
-        relations: List[Relation] = self.model.relations.get("logging-consumer", [])
-
-        for relation in relations:
-            for unit in relation.units:
-                if "endpoint" not in relation.data[unit]:
-                    continue
-                endpoint = relation.data[unit]["endpoint"]
-                deserialized_endpoint = json.loads(endpoint)
-                url = deserialized_endpoint["url"]
-                endpoints[unit.name] = url
-
-        return endpoints
-
-    def _update_tempo_cluster(self) -> None:
-        """Build the config and publish everything to the application databag."""
-        if not self._is_consistent:
-            logger.error("skipped tempo cluster update: inconsistent state")
-            return
-
-        if not self.unit.is_leader():
-            return
-
-        kwargs = {}
-
-        if self.tls_available:
-            # we share the certs in plaintext as they're not sensitive information
-            kwargs["ca_cert"] = self.cert_handler.ca_cert
-            kwargs["server_cert"] = self.cert_handler.server_cert
-            kwargs["privkey_secret_id"] = self.tempo_cluster.publish_privkey(VAULT_SECRET_LABEL)
-
-        # On every function call, we always publish everything to the databag; however, if there
-        # are no changes, Juju will notice there's no delta and do nothing
-        self.tempo_cluster.publish_data(
-            tempo_config=self.tempo.generate_config(
-                self._requested_receivers(),
-                self._s3_config,
-                self.tempo_cluster.gather_addresses_by_role(),
-                self.tempo_cluster.gather_addresses(),
-            ),
-            loki_endpoints=self.loki_endpoints_by_unit,
-            # TODO tempo receiver for charm tracing
-            **kwargs,
-        )
+    def requested_receivers_urls(self) -> Dict[str, str]:
+        """Endpoints to which the workload (and the worker charm) can push traces to."""
+        return {
+            receiver: self.get_receiver_url(receiver) for receiver in self._requested_receivers()
+        }
 
     @property
     def _static_ingress_config(self) -> dict:
@@ -571,7 +318,7 @@ def _ingress_config(self) -> dict:
                 # TODO better matcher
                 "rule": "ClientIP(`0.0.0.0/0`)",
             }
-            if sanitized_protocol.endswith("grpc") and not self.tls_available:
+            if sanitized_protocol.endswith("grpc") and not self.coordinator.tls_available:
                 # to send traces to unsecured GRPC endpoints, we need h2c
                 # see https://doc.traefik.io/traefik/v2.0/user-guides/grpc/#with-http-h2c
                 http_services[
@@ -590,6 +337,49 @@ def _ingress_config(self) -> dict:
             },
         }
 
+    def get_receiver_url(self, protocol: ReceiverProtocol):
+        """Return the receiver endpoint URL based on the protocol.
+
+        if ingress is used, return endpoint provided by the ingress instead.
+        """
+        protocol_type = receiver_protocol_to_transport_protocol.get(protocol)
+        # ingress.is_ready returns True even when traefik hasn't sent any data yet
+        has_ingress = (
+            self.ingress.is_ready() and self.ingress.external_host and self.ingress.scheme
+        )
+        receiver_port = self.tempo.receiver_ports[protocol]
+
+        if has_ingress:
+            url = (
+                self.ingress.external_host
+                if protocol_type == "grpc"
+                else f"{self.ingress.scheme}://{self.ingress.external_host}"
+            )
+        else:
+            url = (
+                self.coordinator.hostname
+                if protocol_type == "grpc"
+                else self.coordinator._internal_url
+            )
+
+        return f"{url}:{receiver_port}"
+
+    def is_workload_ready(self):
+        """Whether the tempo built-in readiness check reports 'ready'."""
+        if self.coordinator.tls_available:
+            tls, s = f" --cacert {self.tempo.tls_ca_path}", "s"
+        else:
+            tls = s = ""
+
+        # cert is for fqdn/ingress, not for IP
+        cmd = f"curl{tls} http{s}://{self.coordinator.hostname}:{self.tempo.tempo_http_server_port}/ready"
+
+        try:
+            out = getoutput(cmd).split("\n")[-1]
+        except (CalledProcessError, IndexError):
+            return False
+        return out == "ready"
+
 
 if __name__ == "__main__":  # pragma: nocover
     main(TempoCoordinatorCharm)
diff --git a/src/coordinator.py b/src/coordinator.py
deleted file mode 100644
index b7bcea9..0000000
--- a/src/coordinator.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import logging
-from collections import Counter
-from typing import Dict, List, Optional, Set
-
-from tempo_cluster import TempoClusterProvider, TempoRole
-
-logger = logging.getLogger(__name__)
-
-MINIMAL_DEPLOYMENT = {
-    TempoRole.querier: 1,
-    TempoRole.query_frontend: 1,
-    TempoRole.ingester: 1,
-    TempoRole.distributor: 1,
-    TempoRole.compactor: 1,
-    TempoRole.metrics_generator: 1,
-}
-"""The minimal set of roles that need to be allocated for the
-deployment to be considered consistent (otherwise we set blocked)."""
-
-# TODO: find out what the actual recommended deployment is
-RECOMMENDED_DEPLOYMENT = Counter(
-    {
-        TempoRole.querier: 1,
-        TempoRole.query_frontend: 1,
-        TempoRole.ingester: 3,
-        TempoRole.distributor: 1,
-        TempoRole.compactor: 1,
-        TempoRole.metrics_generator: 1,
-    }
-)
-"""The set of roles that need to be allocated for the
-deployment to be considered robust according to the official 
-recommendations/guidelines."""
-
-
-class TempoCoordinator:
-    """Tempo coordinator."""
-
-    def __init__(self, cluster_provider: TempoClusterProvider):
-        self._cluster_provider = cluster_provider
-        self._roles: Dict[TempoRole, int] = self._cluster_provider.gather_roles()
-
-        # Whether the roles list makes up a coherent mimir deployment.
-        self.is_coherent = set(self._roles.keys()).issuperset(MINIMAL_DEPLOYMENT)
-        self.missing_roles: Set[TempoRole] = set(MINIMAL_DEPLOYMENT).difference(self._roles.keys())
-        # If the coordinator is incoherent, return the roles that are missing for it to become so.
-
-        def _is_recommended():
-            for role, min_n in RECOMMENDED_DEPLOYMENT.items():
-                if self._roles.get(role, 0) < min_n:
-                    return False
-            return True
-
-        self.is_recommended: bool = _is_recommended()
-        # Whether the present roles are a superset of the minimal deployment.
-        # I.E. If all required roles are assigned, and each role has the recommended amount of units.
-        # python>=3.11 would support roles >= RECOMMENDED_DEPLOYMENT
-
-    def get_deployment_inconsistencies(self, has_s3: bool) -> List[str]:
-        """Determine whether the deployment as a whole is consistent.
-
-        Return a list of failed consistency checks.
-        """
-        return self._get_deployment_inconsistencies(
-            has_s3=has_s3,
-            coherent=self.is_coherent,
-            missing_roles=self.missing_roles,
-        )
-
-    @staticmethod
-    def _get_deployment_inconsistencies(
-        has_s3: bool,
-        coherent: bool,
-        missing_roles: Optional[Set[TempoRole]] = None,
-    ) -> List[str]:
-        """Determine whether the deployment as a whole is consistent.
-
-        Return a list of failed consistency checks.
-        """
-        failures = []
-        if not has_s3:
-            failures.append("Tempo has no s3 integration.")
-        if not coherent:
-            failures.append(f"Incoherent coordinator: missing roles: {missing_roles}.")
-        return failures
diff --git a/src/nginx.py b/src/nginx_config.py
similarity index 61%
rename from src/nginx.py
rename to src/nginx_config.py
index b706f9a..6f14d50 100644
--- a/src/nginx.py
+++ b/src/nginx_config.py
@@ -6,54 +6,29 @@
 from typing import Any, Dict, List, Optional, Set
 
 import crossplane
-from ops import CharmBase
-from ops.pebble import Layer, PathError, ProtocolError
+from cosl.coordinated_workers.coordinator import Coordinator
+from cosl.coordinated_workers.nginx import CERT_PATH, KEY_PATH
 
 from tempo import Tempo
-from tempo_cluster import TempoClusterProvider, TempoRole
+from tempo_config import TempoRole
 
 logger = logging.getLogger(__name__)
 
 
-NGINX_DIR = "/etc/nginx"
-NGINX_CONFIG = f"{NGINX_DIR}/nginx.conf"
-KEY_PATH = f"{NGINX_DIR}/certs/server.key"
-CERT_PATH = f"{NGINX_DIR}/certs/server.cert"
-CA_CERT_PATH = f"{NGINX_DIR}/certs/ca.cert"
-
-
-class Nginx:
+class NginxConfig:
     """Helper class to manage the nginx workload."""
 
-    config_path = NGINX_CONFIG
-
-    def __init__(self, charm: CharmBase, cluster_provider: TempoClusterProvider, server_name: str):
-        self._charm = charm
-        self.cluster_provider = cluster_provider
+    def __init__(self, server_name: str):
         self.server_name = server_name
-        self._container = self._charm.unit.get_container("nginx")
-
-    def configure_pebble_layer(self) -> None:
-        """Configure pebble layer."""
-        new_config: str = self.config()
-        should_restart: bool = self._has_config_changed(new_config)
-        if self._container.can_connect():
-            self._container.push(self.config_path, new_config, make_dirs=True)  # type: ignore
-            self._container.add_layer("nginx", self.layer, combine=True)
-            self._container.autostart()
 
-            if should_restart:
-                logger.info("new nginx config: reloading the service")
-                self.reload()
-
-    def config(self) -> str:
+    def config(self, coordinator: Coordinator) -> str:
         """Build and return the Nginx configuration."""
-        full_config = self._prepare_config()
+        full_config = self._prepare_config(coordinator)
         return crossplane.build(full_config)
 
-    def _prepare_config(self) -> List[dict]:
+    def _prepare_config(self, coordinator: Coordinator) -> List[dict]:
         log_level = "error"
-        addresses_by_role = self.cluster_provider.gather_addresses_by_role()
+        addresses_by_role = coordinator.cluster.gather_addresses_by_role()
         # build the complete configuration
         full_config = [
             {"directive": "worker_processes", "args": ["5"]},
@@ -102,52 +77,12 @@ def _prepare_config(self) -> List[dict]:
                     },
                     {"directive": "proxy_read_timeout", "args": ["300"]},
                     # server block
-                    *self._servers(addresses_by_role),
+                    *self._servers(addresses_by_role, coordinator.nginx.are_certificates_on_disk),
                 ],
             },
         ]
         return full_config
 
-    def _has_config_changed(self, new_config: str) -> bool:
-        """Return True if the passed config differs from the one on disk."""
-        if not self._container.can_connect():
-            logger.debug("Could not connect to Nginx container")
-            return False
-
-        try:
-            current_config = self._container.pull(self.config_path).read()
-        except (ProtocolError, PathError) as e:
-            logger.warning(
-                "Could not check the current nginx configuration due to "
-                "a failure in retrieving the file: %s",
-                e,
-            )
-            return False
-
-        return current_config != new_config
-
-    def reload(self) -> None:
-        """Reload the nginx config without restarting the service."""
-        self._container.exec(["nginx", "-s", "reload"])
-
-    @property
-    def layer(self) -> Layer:
-        """Return the Pebble layer for Nginx."""
-        return Layer(
-            {
-                "summary": "nginx layer",
-                "description": "pebble config layer for Nginx",
-                "services": {
-                    "nginx": {
-                        "override": "replace",
-                        "summary": "nginx",
-                        "command": "nginx -g 'daemon off;'",
-                        "startup": "enabled",
-                    }
-                },
-            }
-        )
-
     def _log_verbose(self, verbose: bool = True) -> List[Dict[str, Any]]:
         if verbose:
             return [{"directive": "access_log", "args": ["/dev/stderr", "main"]}]
@@ -167,28 +102,6 @@ def _upstreams(self, addresses_by_role: Dict[str, Set[str]]) -> List[Dict[str, A
         addresses_mapped_to_upstreams = {}
         nginx_upstreams = []
         addresses_mapped_to_upstreams = addresses_by_role.copy()
-        if TempoRole.all in addresses_by_role.keys():
-            # for all, we add addresses to existing upstreams for distributor / query_frontend or create the set
-            if TempoRole.distributor in addresses_mapped_to_upstreams:
-                addresses_mapped_to_upstreams[TempoRole.distributor] = (
-                    addresses_mapped_to_upstreams[TempoRole.distributor].union(
-                        addresses_by_role[TempoRole.all]
-                    )
-                )
-            else:
-                addresses_mapped_to_upstreams[TempoRole.distributor] = addresses_by_role[
-                    TempoRole.all
-                ]
-            if TempoRole.query_frontend in addresses_mapped_to_upstreams:
-                addresses_mapped_to_upstreams[TempoRole.query_frontend] = (
-                    addresses_mapped_to_upstreams[TempoRole.query_frontend].union(
-                        addresses_by_role[TempoRole.all]
-                    )
-                )
-            else:
-                addresses_mapped_to_upstreams[TempoRole.query_frontend] = addresses_by_role[
-                    TempoRole.all
-                ]
         if TempoRole.distributor in addresses_mapped_to_upstreams.keys():
             nginx_upstreams.extend(
                 self._distributor_upstreams(addresses_mapped_to_upstreams[TempoRole.distributor])
@@ -276,23 +189,29 @@ def _listen_args(self, port: int, ipv6: bool, ssl: bool, http2: bool) -> List[st
             args.append("http2")
         return args
 
-    def _servers(self, addresses_by_role: Dict[str, Set[str]]) -> List[Dict[str, Any]]:
+    def _servers(
+        self, addresses_by_role: Dict[str, Set[str]], tls: bool = False
+    ) -> List[Dict[str, Any]]:
         servers = []
         roles = addresses_by_role.keys()
 
-        if TempoRole.distributor.value in roles or TempoRole.all.value in roles:
+        if TempoRole.distributor.value in roles:
             for protocol, port in Tempo.receiver_ports.items():
-                servers.append(self._server(port, protocol.replace("_", "-"), "grpc" in protocol))
-        if TempoRole.query_frontend.value in roles or TempoRole.all.value in roles:
+                servers.append(
+                    self._server(port, protocol.replace("_", "-"), "grpc" in protocol, tls)
+                )
+        if TempoRole.query_frontend.value in roles:
             for protocol, port in Tempo.server_ports.items():
-                servers.append(self._server(port, protocol.replace("_", "-"), "grpc" in protocol))
+                servers.append(
+                    self._server(port, protocol.replace("_", "-"), "grpc" in protocol, tls)
+                )
         return servers
 
-    def _server(self, port: int, upstream: str, grpc: bool = False) -> Dict[str, Any]:
+    def _server(
+        self, port: int, upstream: str, grpc: bool = False, tls: bool = False
+    ) -> Dict[str, Any]:
         auth_enabled = False
 
-        tls = self.tls_ready
-
         if tls:
             return {
                 "directive": "server",
@@ -328,26 +247,3 @@ def _server(self, port: int, upstream: str, grpc: bool = False) -> Dict[str, Any
                 *self._locations(upstream, grpc, tls),
             ],
         }
-
-    @property
-    def tls_ready(self) -> bool:
-        """Whether cert, key, and ca paths are found on disk and Nginx is ready to use tls."""
-        if not self._container.can_connect():
-            return False
-        return all(
-            self._container.exists(tls_path) for tls_path in (KEY_PATH, CERT_PATH, CA_CERT_PATH)
-        )
-
-    def configure_tls(self, private_key: str, server_cert: str, ca_cert: str) -> None:
-        """Save the certificates file to disk and run update-ca-certificates."""
-        if self._container.can_connect():
-            self._container.push(KEY_PATH, private_key, make_dirs=True)
-            self._container.push(CERT_PATH, server_cert, make_dirs=True)
-            self._container.push(CA_CERT_PATH, ca_cert, make_dirs=True)
-
-    def delete_certificates(self) -> None:
-        """Delete the certificate files from disk and run update-ca-certificates."""
-        if self._container.can_connect():
-            self._container.remove_path(CERT_PATH, recursive=True)
-            self._container.remove_path(KEY_PATH, recursive=True)
-            self._container.remove_path(CA_CERT_PATH, recursive=True)
diff --git a/src/nginx_prometheus_exporter.py b/src/nginx_prometheus_exporter.py
deleted file mode 100644
index 7ce7b2c..0000000
--- a/src/nginx_prometheus_exporter.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright 2024 Canonical
-# See LICENSE file for licensing details.
-"""Nginx Prometheus exporter workload."""
-
-import logging
-
-from ops import CharmBase
-from ops.pebble import Layer
-
-logger = logging.getLogger(__name__)
-
-NGINX_PROMETHEUS_EXPORTER_PORT = "9113"
-
-
-class NginxPrometheusExporter:
-    """Helper class to manage the nginx prometheus exporter workload."""
-
-    def __init__(self, charm: CharmBase) -> None:
-        self._charm = charm
-        self._container = self._charm.unit.get_container("nginx-prometheus-exporter")
-
-    def configure_pebble_layer(self) -> None:
-        """Configure pebble layer."""
-        self._container.add_layer("nginx-prometheus-exporter", self.layer, combine=True)
-        self._container.autostart()
-
-    @property
-    def layer(self) -> Layer:
-        """Return the Pebble layer for Nginx Prometheus exporter."""
-        scheme = "https" if self._charm.tls_available else "http"  # type: ignore
-        return Layer(
-            {
-                "summary": "nginx prometheus exporter layer",
-                "description": "pebble config layer for Nginx Prometheus exporter",
-                "services": {
-                    "nginx": {
-                        "override": "replace",
-                        "summary": "nginx prometheus exporter",
-                        "command": f"nginx-prometheus-exporter --no-nginx.ssl-verify --web.listen-address=:{NGINX_PROMETHEUS_EXPORTER_PORT}  --nginx.scrape-uri={scheme}://127.0.0.1:3200/status",
-                        "startup": "enabled",
-                    }
-                },
-            }
-        )
diff --git a/src/tempo.py b/src/tempo.py
index fd17ab2..e1b64cc 100644
--- a/src/tempo.py
+++ b/src/tempo.py
@@ -4,18 +4,13 @@
 
 """Tempo workload configuration and client."""
 import logging
-import socket
-from subprocess import CalledProcessError, getoutput
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
+from typing import Callable, Dict, Optional, Sequence, Set, Tuple
 
-from charms.tempo_k8s.v2.tracing import (
-    ReceiverProtocol,
-    receiver_protocol_to_transport_protocol,
-)
-from charms.traefik_route_k8s.v0.traefik_route import TraefikRouteRequirer
+import yaml
+from charms.tempo_k8s.v2.tracing import ReceiverProtocol
+from cosl.coordinated_workers.coordinator import Coordinator
 
 import tempo_config
-from tempo_cluster import TempoRole
 
 logger = logging.getLogger(__name__)
 
@@ -23,21 +18,13 @@
 class Tempo:
     """Class representing the Tempo client workload configuration."""
 
-    config_path = "/etc/tempo/tempo.yaml"
-
-    # cert path on charm container
-    server_cert_path = "/usr/local/share/ca-certificates/ca.crt"
-
     # cert paths on tempo container
-    tls_cert_path = "/etc/tempo/tls/server.crt"
-    tls_key_path = "/etc/tempo/tls/server.key"
+    tls_cert_path = "/etc/worker/server.cert"
+    tls_key_path = "/etc/worker/private.key"
     tls_ca_path = "/usr/local/share/ca-certificates/ca.crt"
 
     wal_path = "/etc/tempo/tempo_wal"
-    log_path = "/var/log/tempo.log"
-    tempo_ready_notice_key = "canonical.com/tempo/workload-ready"
 
-    s3_relation_name = "s3"
     s3_bucket_name = "tempo"
 
     memberlist_port = 7946
@@ -47,6 +34,7 @@ class Tempo:
         "tempo_grpc": 9096,  # default grpc listen port is 9095, but that conflicts with promtail.
     }
 
+    # ports source: https://github.com/grafana/tempo/blob/main/example/docker-compose/local/docker-compose.yaml
     receiver_ports: Dict[str, int] = {
         "zipkin": 9411,
         "otlp_grpc": 4317,
@@ -64,14 +52,9 @@ class Tempo:
 
     def __init__(
         self,
-        external_host: Optional[str] = None,
-        use_tls: bool = False,
+        requested_receivers: Callable[[], "Tuple[ReceiverProtocol, ...]"],
     ):
-        # ports source: https://github.com/grafana/tempo/blob/main/example/docker-compose/local/docker-compose.yaml
-
-        # fqdn, if an ingress is not available, else the ingress address.
-        self._external_hostname = external_host or socket.getfqdn()
-        self.use_tls = use_tls
+        self._receivers_getter = requested_receivers
 
     @property
     def tempo_http_server_port(self) -> int:
@@ -83,91 +66,28 @@ def tempo_grpc_server_port(self) -> int:
         """Return the receiver port for the built-in tempo_http protocol."""
         return self.server_ports["tempo_grpc"]
 
-    def get_external_ports(self, service_name_prefix: str) -> List[Tuple[str, int, int]]:
-        """List of service names and port mappings for the kubernetes service patch.
-
-        Includes the tempo server as well as the receiver ports.
-        """
-        # todo allow remapping ports?
-        all_ports = {**self.server_ports}
-        return [
-            (
-                (f"{service_name_prefix}-{service_name}").replace("_", "-"),
-                all_ports[service_name],
-                all_ports[service_name],
-            )
-            for service_name in all_ports
-        ]
-
-    @property
-    def url(self) -> str:
-        """Base url at which the tempo server is locally reachable over http."""
-        scheme = "https" if self.use_tls else "http"
-        return f"{scheme}://{self._external_hostname}"
-
-    def get_receiver_url(self, protocol: ReceiverProtocol, ingress: TraefikRouteRequirer):
-        """Return the receiver endpoint URL based on the protocol.
-
-        if ingress is used, return endpoint provided by the ingress instead.
-        """
-        protocol_type = receiver_protocol_to_transport_protocol.get(protocol)
-        # ingress.is_ready returns True even when traefik hasn't sent any data yet
-        has_ingress = ingress.is_ready() and ingress.external_host and ingress.scheme
-        receiver_port = self.receiver_ports[protocol]
-
-        if has_ingress:
-            url = (
-                ingress.external_host
-                if protocol_type == "grpc"
-                else f"{ingress.scheme}://{ingress.external_host}"
-            )
-        else:
-            url = self._external_hostname if protocol_type == "grpc" else self.url
-
-        return f"{url}:{receiver_port}"
-
-    def _build_server_config(self):
-        server_config = tempo_config.Server(
-            http_listen_port=self.tempo_http_server_port,
-            # we need to specify a grpc server port even if we're not using the grpc server,
-            # otherwise it will default to 9595 and make promtail bork
-            grpc_listen_port=self.tempo_grpc_server_port,
-        )
-
-        if self.use_tls:
-            server_tls_config = tempo_config.TLS(
-                cert_file=str(self.tls_cert_path),
-                key_file=str(self.tls_key_path),
-                client_ca_file=str(self.tls_ca_path),
-            )
-            server_config.http_tls_config = server_tls_config
-            server_config.grpc_tls_config = server_tls_config
-
-        return server_config
-
-    def generate_config(
+    def config(
         self,
-        receivers: Sequence[ReceiverProtocol],
-        s3_config: dict,
-        roles_addresses: Dict[str, Set[str]],
-        peers: Optional[Set[str]] = None,
-    ) -> Dict[str, Any]:
+        coordinator: Coordinator,
+    ) -> str:
         """Generate the Tempo configuration.
 
         Only activate the provided receivers.
         """
         config = tempo_config.Tempo(
             auth_enabled=False,
-            server=self._build_server_config(),
-            distributor=self._build_distributor_config(receivers),
+            server=self._build_server_config(coordinator.tls_available),
+            distributor=self._build_distributor_config(
+                self._receivers_getter(), coordinator.tls_available
+            ),
             ingester=self._build_ingester_config(),
-            memberlist=self._build_memberlist_config(peers),
+            memberlist=self._build_memberlist_config(coordinator.cluster.gather_addresses()),
             compactor=self._build_compactor_config(),
-            querier=self._build_querier_config(roles_addresses),
-            storage=self._build_storage_config(s3_config),
+            querier=self._build_querier_config(coordinator.cluster.gather_addresses_by_role()),
+            storage=self._build_storage_config(coordinator._s3_config),
         )
 
-        if self.use_tls:
+        if coordinator.tls_available:
             # cfr:
             # https://grafana.com/docs/tempo/latest/configuration/network/tls/#client-configuration
             tls_config = {
@@ -176,7 +96,7 @@ def generate_config(
                 "tls_key_path": self.tls_key_path,
                 "tls_ca_path": self.tls_ca_path,
                 # try with fqdn?
-                "tls_server_name": self._external_hostname,
+                "tls_server_name": coordinator.hostname,
             }
             config.ingester_client = tempo_config.Client(
                 grpc_client_config=tempo_config.ClientTLS(**tls_config)
@@ -189,7 +109,26 @@ def generate_config(
             )
             config.memberlist = config.memberlist.model_copy(update=tls_config)
 
-        return config.model_dump(mode="json", exclude_none=True)
+        return yaml.dump(config.model_dump(mode="json", exclude_none=True))
+
+    def _build_server_config(self, use_tls=False):
+        server_config = tempo_config.Server(
+            http_listen_port=self.tempo_http_server_port,
+            # we need to specify a grpc server port even if we're not using the grpc server,
+            # otherwise it will default to 9595 and make promtail bork
+            grpc_listen_port=self.tempo_grpc_server_port,
+        )
+
+        if use_tls:
+            server_tls_config = tempo_config.TLS(
+                cert_file=str(self.tls_cert_path),
+                key_file=str(self.tls_key_path),
+                client_ca_file=str(self.tls_ca_path),
+            )
+            server_config.http_tls_config = server_tls_config
+            server_config.grpc_tls_config = server_tls_config
+
+        return server_config
 
     def _build_storage_config(self, s3_config: dict):
         storage_config = tempo_config.TraceStorage(
@@ -213,27 +152,16 @@ def _build_storage_config(self, s3_config: dict):
         )
         return tempo_config.Storage(trace=storage_config)
 
-    def is_ready(self):
-        """Whether the tempo built-in readiness check reports 'ready'."""
-        if self.use_tls:
-            tls, s = f" --cacert {self.server_cert_path}", "s"
-        else:
-            tls = s = ""
-
-        # cert is for fqdn/ingress, not for IP
-        cmd = f"curl{tls} http{s}://{self._external_hostname}:{self.tempo_http_server_port}/ready"
-
-        try:
-            out = getoutput(cmd).split("\n")[-1]
-        except (CalledProcessError, IndexError):
-            return False
-        return out == "ready"
-
     def _build_querier_config(self, roles_addresses: Dict[str, Set[str]]):
         """Build querier config"""
-        addr = "localhost"
-        if TempoRole.query_frontend in roles_addresses.keys():
-            addr = roles_addresses[TempoRole.query_frontend].pop()
+        # if distributor and query-frontend have the same address, then the mode of operation is 'all'.
+        if roles_addresses.get(tempo_config.TempoRole.query_frontend) == roles_addresses.get(
+            tempo_config.TempoRole.distributor
+            or (not roles_addresses.get(tempo_config.TempoRole.query_frontend))
+        ):
+            addr = "localhost"
+        else:
+            addr = roles_addresses.get(tempo_config.TempoRole.query_frontend).pop()
 
         return tempo_config.Querier(
             frontend_worker=tempo_config.FrontendWorker(
@@ -274,7 +202,9 @@ def _build_ingester_config(self):
             max_block_duration="30m",
         )
 
-    def _build_distributor_config(self, receivers: Sequence[ReceiverProtocol]):  # noqa: C901
+    def _build_distributor_config(
+        self, receivers: Sequence[ReceiverProtocol], use_tls=False
+    ):  # noqa: C901
         """Build distributor config"""
         # receivers: the receivers we have to enable because the requirers we're related to
         # intend to use them. It already includes receivers that are always enabled
@@ -284,7 +214,7 @@ def _build_distributor_config(self, receivers: Sequence[ReceiverProtocol]):  # n
         if not receivers_set:
             logger.warning("No receivers set. Tempo will be up but not functional.")
 
-        if self.use_tls:
+        if use_tls:
             receiver_config = {
                 "tls": {
                     "ca_file": str(self.tls_ca_path),
diff --git a/src/tempo_cluster.py b/src/tempo_cluster.py
deleted file mode 100644
index bb474c1..0000000
--- a/src/tempo_cluster.py
+++ /dev/null
@@ -1,391 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2024 Canonical
-# See LICENSE file for licensing details.
-
-"""This module contains an endpoint wrapper class for the requirer side of the ``tempo-cluster`` relation.
-
-As this relation is cluster-internal and not intended for third-party charms to interact with
-`tempo-coordinator-k8s`, its only user will be the tempo-worker-k8s charm. As such,
-it does not live in a charm lib as most other relation endpoint wrappers do.
-"""
-import collections
-import json
-import logging
-from enum import Enum, unique
-from typing import Any, Dict, List, MutableMapping, Optional, Set
-
-import ops
-import pydantic
-
-# The only reason we need the tracing lib is this enum. Not super nice.
-from charms.tempo_k8s.v2.tracing import ReceiverProtocol
-from ops import EventSource, Object, ObjectEvents
-from pydantic import BaseModel, ConfigDict
-
-log = logging.getLogger("tempo_cluster")
-
-DEFAULT_ENDPOINT_NAME = "tempo-cluster"
-BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"}
-
-
-# TODO: inherit enum.StrEnum when jammy is no longer supported.
-# https://docs.python.org/3/library/enum.html#enum.StrEnum
-@unique
-class TempoRole(str, Enum):
-    """Tempo component role names.
-
-    References:
-     arch:
-      -> https://grafana.com/docs/tempo/latest/operations/architecture/
-     config:
-      -> https://grafana.com/docs/tempo/latest/configuration/#server
-    """
-
-    # scalable-single-binary is a bit too long to type
-    all = "all"  # default, meta-role. gets remapped to scalable-single-binary by the worker.
-
-    querier = "querier"
-    query_frontend = "query-frontend"
-    ingester = "ingester"
-    distributor = "distributor"
-    compactor = "compactor"
-    metrics_generator = "metrics-generator"
-
-    @property
-    def all_nonmeta(self):
-        return (
-            TempoRole.querier,
-            TempoRole.query_frontend,
-            TempoRole.ingester,
-            TempoRole.distributor,
-            TempoRole.compactor,
-            TempoRole.metrics_generator,
-        )
-
-
-class ConfigReceivedEvent(ops.EventBase):
-    """Event emitted when the "tempo-cluster" provider has shared a new tempo config."""
-
-    config: Dict[str, Any]
-    """The tempo config."""
-
-    def __init__(self, handle: ops.framework.Handle, config: Dict[str, Any]):
-        super().__init__(handle)
-        self.config = config
-
-    def snapshot(self) -> Dict[str, Any]:
-        """Used by the framework to serialize the event to disk.
-
-        Not meant to be called by charm code.
-        """
-        return {"config": json.dumps(self.config)}
-
-    def restore(self, snapshot: Dict[str, Any]):
-        """Used by the framework to deserialize the event from disk.
-
-        Not meant to be called by charm code.
-        """
-        self.relation = json.loads(snapshot["config"])  # noqa
-
-
-class TempoClusterError(Exception):
-    """Base class for exceptions raised by this module."""
-
-
-class DataValidationError(TempoClusterError):
-    """Raised when relation databag validation fails."""
-
-
-class DatabagAccessPermissionError(TempoClusterError):
-    """Raised when a follower attempts to write leader settings."""
-
-
-class _JujuTopologyModel(pydantic.BaseModel):
-    """_JujuTopologyModel."""
-
-    model: str
-    model_uuid: str
-    application: str
-    charm_name: str
-    unit: str
-
-
-# DatabagModel implementation from traefik.v1.ingress charm lib.
-PYDANTIC_IS_V1 = int(pydantic.version.VERSION.split(".")[0]) < 2
-if PYDANTIC_IS_V1:
-
-    class DatabagModel(BaseModel):  # type: ignore
-        """Base databag model."""
-
-        class Config:
-            """Pydantic config."""
-
-            allow_population_by_field_name = True
-            """Allow instantiating this class by field name (instead of forcing alias)."""
-
-        @classmethod
-        def load(cls, databag: MutableMapping):
-            """Load this model from a Juju databag."""
-
-            try:
-                data = {
-                    k: json.loads(v)
-                    for k, v in databag.items()
-                    # Don't attempt to parse model-external values
-                    if k in {f.alias for f in cls.__fields__.values()}  # type: ignore
-                }
-            except json.JSONDecodeError as e:
-                msg = f"invalid databag contents: expecting json. {databag}"
-                log.error(msg)
-                raise DataValidationError(msg) from e
-
-            try:
-                return cls.parse_raw(json.dumps(data))  # type: ignore
-            except pydantic.ValidationError as e:
-                msg = f"failed to validate databag: {databag}"
-                log.debug(msg, exc_info=True)
-                raise DataValidationError(msg) from e
-
-        def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True):
-            """Write the contents of this model to Juju databag.
-
-            :param databag: the databag to write the data to.
-            :param clear: ensure the databag is cleared before writing it.
-            """
-            if clear and databag:
-                databag.clear()
-
-            if databag is None:
-                databag = {}
-
-            for key, value in self.dict(by_alias=True, exclude_defaults=True).items():  # type: ignore
-                databag[key] = json.dumps(value)
-
-            return databag
-
-else:
-    from pydantic import ConfigDict
-
-    class DatabagModel(BaseModel):
-        """Base databag model."""
-
-        model_config = ConfigDict(
-            # tolerate additional keys in databag
-            extra="ignore",
-            # Allow instantiating this class by field name (instead of forcing alias).
-            populate_by_name=True,
-        )  # type: ignore
-        """Pydantic config."""
-
-        @classmethod
-        def load(cls, databag: MutableMapping):
-            """Load this model from a Juju databag."""
-
-            try:
-                data = {
-                    k: json.loads(v)
-                    for k, v in databag.items()
-                    # Don't attempt to parse model-external values
-                    if k in {(f.alias or n) for n, f in cls.model_fields.items()}  # type: ignore
-                }
-            except json.JSONDecodeError as e:
-                msg = f"invalid databag contents: expecting json. {databag}"
-                log.error(msg)
-                raise DataValidationError(msg) from e
-
-            try:
-                return cls.model_validate_json(json.dumps(data))  # type: ignore
-            except pydantic.ValidationError as e:
-                msg = f"failed to validate databag: {databag}"
-                log.debug(msg, exc_info=True)
-                raise DataValidationError(msg) from e
-
-        def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True):
-            """Write the contents of this model to Juju databag.
-
-            :param databag: the databag to write the data to.
-            :param clear: ensure the databag is cleared before writing it.
-            """
-            if clear and databag:
-                databag.clear()
-
-            if databag is None:
-                databag = {}
-
-            dct = self.model_dump(mode="json", by_alias=True, exclude_defaults=True)  # type: ignore
-            databag.update({k: json.dumps(v) for k, v in dct.items()})
-            return databag
-
-
-class TempoClusterRequirerAppData(DatabagModel):
-    """TempoClusterRequirerAppData."""
-
-    role: TempoRole
-
-
-class TempoClusterRequirerUnitData(DatabagModel):
-    """TempoClusterRequirerUnitData."""
-
-    juju_topology: _JujuTopologyModel
-    address: str
-
-
-class TempoClusterProviderAppData(DatabagModel):
-    """TempoClusterProviderAppData."""
-
-    tempo_config: Dict[str, Any]
-    loki_endpoints: Optional[Dict[str, str]] = None
-    ca_cert: Optional[str] = None
-    server_cert: Optional[str] = None
-    privkey_secret_id: Optional[str] = None
-    tempo_receiver: Optional[Dict[ReceiverProtocol, str]] = None
-
-
-class TempoClusterChangedEvent(ops.EventBase):
-    """Event emitted when any "tempo-cluster" relation event fires."""
-
-
-class TempoClusterProviderEvents(ObjectEvents):
-    """Events emitted by the TempoClusterProvider "tempo-cluster" endpoint wrapper."""
-
-    changed = EventSource(TempoClusterChangedEvent)
-
-
-class TempoClusterProvider(Object):
-    """``tempo-cluster`` provider endpoint wrapper."""
-
-    on = TempoClusterProviderEvents()  # type: ignore
-
-    def __init__(
-        self,
-        charm: ops.CharmBase,
-        key: Optional[str] = None,
-        endpoint: str = DEFAULT_ENDPOINT_NAME,
-    ):
-        super().__init__(charm, key or endpoint)
-        self._charm = charm
-
-        # filter out common unhappy relation states
-        self._relations: List[ops.Relation] = [
-            rel for rel in self.model.relations[endpoint] if (rel.app and rel.data)
-        ]
-
-        # we coalesce all tempo-cluster-relation-* events into a single cluster-changed API.
-        # the coordinator uses a common exit hook reconciler, that's why.
-        self.framework.observe(
-            self._charm.on[endpoint].relation_joined, self._on_tempo_cluster_changed
-        )
-        self.framework.observe(
-            self._charm.on[endpoint].relation_changed, self._on_tempo_cluster_changed
-        )
-        self.framework.observe(
-            self._charm.on[endpoint].relation_departed, self._on_tempo_cluster_changed
-        )
-        self.framework.observe(
-            self._charm.on[endpoint].relation_broken, self._on_tempo_cluster_changed
-        )
-
-    def _on_tempo_cluster_changed(self, _):
-        self.on.changed.emit()
-
-    def publish_privkey(self, label: str) -> str:
-        """Grant the secret containing the privkey to all relations, and return the secret ID."""
-        secret = self.model.get_secret(label=label)
-        for relation in self._relations:
-            secret.grant(relation)
-        # can't return secret.id because secret was obtained by label, and so
-        # we don't have an ID unless we fetch it
-        return secret.get_info().id
-
-    def publish_data(
-        self,
-        tempo_config: Dict[str, Any],
-        tempo_receiver: Optional[Dict[ReceiverProtocol, Any]] = None,
-        ca_cert: Optional[str] = None,
-        server_cert: Optional[str] = None,
-        privkey_secret_id: Optional[str] = None,
-        loki_endpoints: Optional[Dict[str, str]] = None,
-    ) -> None:
-        """Publish the tempo config to all related tempo worker clusters."""
-        for relation in self._relations:
-            if relation:
-                local_app_databag = TempoClusterProviderAppData(
-                    tempo_config=tempo_config,
-                    loki_endpoints=loki_endpoints,
-                    tempo_receiver=tempo_receiver,
-                    ca_cert=ca_cert,
-                    server_cert=server_cert,
-                    privkey_secret_id=privkey_secret_id,
-                )
-                local_app_databag.dump(relation.data[self.model.app])
-
-    @property
-    def has_workers(self) -> bool:
-        """Return whether this tempo coordinator has any connected workers."""
-        # we use the presence of relations instead of addresses, because we want this
-        # check to fail early
-        return bool(self._relations)
-
-    def gather_addresses_by_role(self) -> Dict[str, Set[str]]:
-        """Go through the worker's unit databags to collect all the addresses published by the units, by role."""
-        data = collections.defaultdict(set)
-        for relation in self._relations:
-
-            if not relation.app:
-                log.debug(f"skipped {relation} as .app is None")
-                continue
-
-            try:
-                worker_app_data = TempoClusterRequirerAppData.load(relation.data[relation.app])
-            except DataValidationError as e:
-                log.info(f"invalid databag contents: {e}")
-                continue
-
-            for worker_unit in relation.units:
-                try:
-                    worker_data = TempoClusterRequirerUnitData.load(relation.data[worker_unit])
-                    unit_address = worker_data.address
-                    data[worker_app_data.role].add(unit_address)
-                except DataValidationError as e:
-                    log.info(f"invalid databag contents: {e}")
-                    continue
-
-        return data
-
-    def gather_addresses(self) -> Set[str]:
-        """Go through the worker's unit databags to collect all the addresses published by the units."""
-        data = set()
-        addresses_by_role = self.gather_addresses_by_role()
-        for role, address_set in addresses_by_role.items():
-            data.update(address_set)
-
-        return data
-
-    def gather_roles(self) -> Dict[TempoRole, int]:
-        """Go through the worker's app databags and sum the available application roles."""
-        data = collections.Counter()
-        for relation in self._relations:
-            if relation.app:
-                remote_app_databag = relation.data[relation.app]
-                try:
-                    worker_role: TempoRole = TempoClusterRequirerAppData.load(
-                        remote_app_databag
-                    ).role
-                except DataValidationError as e:
-                    log.debug(f"invalid databag contents: {e}")
-                    continue
-
-                # the number of units with each role is the number of remote units
-                role_n = len(relation.units)  # exclude this unit
-                if worker_role is TempoRole.all:
-                    for role in [r for r in TempoRole if r is not TempoRole.all]:
-                        data[role] += role_n
-                    continue
-
-                data[worker_role] += role_n
-
-        dct = dict(data)
-        # exclude all roles from the count, if any slipped through
-        if TempoRole.all in data:
-            del data[TempoRole.all]
-        return dct
diff --git a/src/tempo_config.py b/src/tempo_config.py
index 1c2a186..c487dac 100644
--- a/src/tempo_config.py
+++ b/src/tempo_config.py
@@ -2,22 +2,96 @@
 # See LICENSE file for licensing details.
 
 """Helper module for interacting with the Tempo configuration."""
-
 import enum
 import logging
 import re
+from enum import Enum, unique
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Iterable, List, Mapping, Optional
 from urllib.parse import urlparse
 
 from pydantic import BaseModel, ConfigDict, field_validator, model_validator
 
-S3_RELATION_NAME = "s3"
-BUCKET_NAME = "tempo"
-
 logger = logging.getLogger(__name__)
 
 
+# TODO: inherit enum.StrEnum when jammy is no longer supported.
+# https://docs.python.org/3/library/enum.html#enum.StrEnum
+@unique
+class TempoRole(str, Enum):
+    """Tempo component role names.
+
+    References:
+     arch:
+      -> https://grafana.com/docs/tempo/latest/operations/architecture/
+     config:
+      -> https://grafana.com/docs/tempo/latest/configuration/#server
+    """
+
+    # scalable-single-binary is a bit too long to type
+    all = "all"  # default, meta-role. gets remapped to scalable-single-binary by the worker.
+
+    querier = "querier"
+    query_frontend = "query-frontend"
+    ingester = "ingester"
+    distributor = "distributor"
+    compactor = "compactor"
+    metrics_generator = "metrics-generator"
+
+    @staticmethod
+    def all_nonmeta():
+        return {
+            TempoRole.querier,
+            TempoRole.query_frontend,
+            TempoRole.ingester,
+            TempoRole.distributor,
+            TempoRole.compactor,
+            TempoRole.metrics_generator,
+        }
+
+
+META_ROLES = {
+    "all": set(TempoRole.all_nonmeta()),
+}
+"""Tempo component meta-role names."""
+
+MINIMAL_DEPLOYMENT = {
+    TempoRole.querier: 1,
+    TempoRole.query_frontend: 1,
+    TempoRole.ingester: 1,
+    TempoRole.distributor: 1,
+    TempoRole.compactor: 1,
+    TempoRole.metrics_generator: 1,
+}
+"""The minimal set of roles that need to be allocated for the
+deployment to be considered consistent (otherwise we set blocked)."""
+
+RECOMMENDED_DEPLOYMENT = {
+    TempoRole.querier: 1,
+    TempoRole.query_frontend: 1,
+    TempoRole.ingester: 3,
+    TempoRole.distributor: 1,
+    TempoRole.compactor: 1,
+    TempoRole.metrics_generator: 1,
+}
+
+"""
+The set of roles that need to be allocated for the
+deployment to be considered robust according to Grafana Tempo's
+Helm chart configurations.
+https://github.com/grafana/helm-charts/blob/main/charts/tempo-distributed/
+"""
+
+
+class TempoRolesConfig:
+    """Define the configuration for Tempo roles."""
+
+    roles: Iterable[str] = {role for role in TempoRole}
+    meta_roles: Mapping[str, Iterable[str]] = META_ROLES
+    minimal_deployment: Iterable[str] = MINIMAL_DEPLOYMENT
+    recommended_deployment = RECOMMENDED_DEPLOYMENT
+
+
 class ClientAuthTypeEnum(str, enum.Enum):
     """Client auth types."""
 
diff --git a/tests/scenario/conftest.py b/tests/scenario/conftest.py
index 9fe9ebb..d5826b9 100644
--- a/tests/scenario/conftest.py
+++ b/tests/scenario/conftest.py
@@ -2,9 +2,9 @@
 
 import pytest
 from scenario import Container, Context, Relation
+from tempo_cluster import TempoClusterRequirerAppData, TempoRole
 
 from charm import TempoCoordinatorCharm
-from tempo_cluster import TempoClusterRequirerAppData, TempoRole
 
 
 @pytest.fixture
diff --git a/tests/scenario/helpers.py b/tests/scenario/helpers.py
index fc3b79a..dbe8452 100644
--- a/tests/scenario/helpers.py
+++ b/tests/scenario/helpers.py
@@ -1,5 +1,4 @@
 import scenario
-
 from tempo_cluster import TempoClusterProviderAppData
 
 
diff --git a/tests/scenario/test_config.py b/tests/scenario/test_config.py
index 3532fb5..9c8e95a 100644
--- a/tests/scenario/test_config.py
+++ b/tests/scenario/test_config.py
@@ -1,7 +1,7 @@
 from scenario import State
+from tempo_cluster import TempoClusterRequirerUnitData
 
 from charm import TempoCoordinatorCharm
-from tempo_cluster import TempoClusterRequirerUnitData
 
 
 def test_memberlist_multiple_members(
diff --git a/tests/scenario/test_nginx.py b/tests/scenario/test_nginx.py
index 29811a0..a629774 100644
--- a/tests/scenario/test_nginx.py
+++ b/tests/scenario/test_nginx.py
@@ -3,11 +3,11 @@
 from unittest.mock import MagicMock
 
 import pytest
-
 from nginx import Nginx
-from tempo import Tempo
 from tempo_cluster import TempoClusterProvider
 
+from tempo import Tempo
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/tests/scenario/test_tempo_clustered.py b/tests/scenario/test_tempo_clustered.py
index b7eb75c..27d7988 100644
--- a/tests/scenario/test_tempo_clustered.py
+++ b/tests/scenario/test_tempo_clustered.py
@@ -6,10 +6,10 @@
 from charms.tempo_k8s.v2.tracing import TracingRequirerAppData
 from charms.tls_certificates_interface.v3.tls_certificates import ProviderCertificate
 from scenario import Relation, State
+from tempo_cluster import TempoClusterProviderAppData
 
 from charm import TempoCoordinatorCharm
 from tempo import Tempo
-from tempo_cluster import TempoClusterProviderAppData
 from tests.scenario.helpers import get_tempo_config
 
 
diff --git a/tests/unit/test_coherence.py b/tests/unit/test_coherence.py
index cf85915..c3d9ab0 100644
--- a/tests/unit/test_coherence.py
+++ b/tests/unit/test_coherence.py
@@ -1,7 +1,6 @@
 from unittest.mock import MagicMock
 
 import pytest as pytest
-
 from coordinator import (
     MINIMAL_DEPLOYMENT,
     RECOMMENDED_DEPLOYMENT,

From 0b0784cc24d5d3015fe33867187b685e37262740 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Fri, 19 Jul 2024 04:23:37 +0300
Subject: [PATCH 02/22] fix tests

---
 .gitignore                                    |   3 +-
 src/charm.py                                  |   2 +-
 src/prometheus_alert_rules/alerts.yaml        | 187 ---------
 .../{tempo_workers => workers}/alerts.yaml    | 184 +++++++++
 .../{tempo_workers => workers}/rules.yaml     |   0
 src/tempo.py                                  |   6 +-
 tests/integration/helpers.py                  | 122 +++++-
 tests/integration/test_ingressed_tls.py       | 360 +++++++++--------
 tests/integration/test_integration.py         | 334 ++++++++--------
 tests/integration/test_scaling_monolithic.py  |  92 +----
 tests/integration/test_tls.py                 | 371 ++++++++++--------
 tests/integration/tester/src/charm.py         |   2 +-
 tests/scenario/conftest.py                    |  13 +-
 tests/scenario/helpers.py                     |   4 +-
 tests/scenario/test_charm_statuses.py         |  11 +-
 tests/scenario/test_config.py                 |  19 +-
 tests/scenario/test_enabled_receivers.py      |   4 +-
 tests/scenario/test_ingressed_tracing.py      |   4 +-
 tests/scenario/test_nginx.py                  |  71 +---
 tests/scenario/test_tempo_clustered.py        |  71 ++--
 tests/scenario/test_tls.py                    |   7 +-
 tests/unit/test_coherence.py                  |  42 +-
 tests/unit/test_tempo.py                      |  19 +-
 23 files changed, 1007 insertions(+), 921 deletions(-)
 delete mode 100644 src/prometheus_alert_rules/alerts.yaml
 rename src/prometheus_alert_rules/{tempo_workers => workers}/alerts.yaml (82%)
 rename src/prometheus_alert_rules/{tempo_workers => workers}/rules.yaml (100%)

diff --git a/.gitignore b/.gitignore
index ea4fe3d..9ccb50d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@ __pycache__/
 .idea
 .vscode/
 *.egg-info/
-cos-tool-*
\ No newline at end of file
+cos-tool-*
+src/prometheus_alert_rules/consolidated_rules/**
\ No newline at end of file
diff --git a/src/charm.py b/src/charm.py
index 5947939..55b41b4 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -144,6 +144,7 @@ def _internal_url(self) -> str:
         scheme = "http"
         if hasattr(self, "coordinator") and self.coordinator.nginx.are_certificates_on_disk:
             scheme = "https"
+
         return f"{scheme}://{self.hostname}"
 
     @property
@@ -188,7 +189,6 @@ def _on_tracing_request(self, e: RequestEvent):
         logger.debug(f"received tracing request from {e.relation.app}: {e.requested_receivers}")
         self._update_tracing_relations()
 
-
     def _on_ingress_relation_created(self, _: RelationEvent):
         self._configure_ingress()
 
diff --git a/src/prometheus_alert_rules/alerts.yaml b/src/prometheus_alert_rules/alerts.yaml
deleted file mode 100644
index 2c21d50..0000000
--- a/src/prometheus_alert_rules/alerts.yaml
+++ /dev/null
@@ -1,187 +0,0 @@
-groups:
-- name: tempo_alerts
-  rules:
-  - alert: "TempoBlockListRisingQuickly"
-    expr: |
-      avg by (job, instance)(tempodb_blocklist_length) / avg by (job, instance)(tempodb_blocklist_length offset 7d) > 1.4
-    for: "15m"
-    labels:
-      severity: "critical"
-    annotations:
-      summary: "Tempo block list rising quickly (instance {{ $labels.instance }})"
-      description: "The {{ $labels.job }} is experiencing a 40% rise in tempo blocklist length over the last 7 days.  Consider scaling compactors."
-  - alert: TempoCompactionsFailing
-    expr: sum by (job, instance)(increase(tempodb_compaction_errors_total{}[1h])) > 2 and sum by (job, instance)(increase(tempodb_compaction_errors_total{}[5m])) > 0
-    for: 5m
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo compactions failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing more than 2 compactions failures in the past hour."
-  - alert: TempoCompactorUnhealthy
-    expr: max by (job, instance)(tempo_ring_members{state="Unhealthy", name="compactor"}) > 0
-    for: 15m
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo unhealthy compactor(s) (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is having {{ printf \"%f\" $value }} unhealthy compactor(s)."
-  - alert: TempoCompactorsTooManyOutstandingBlocks
-    expr: sum by (tenant) (tempodb_compaction_outstanding_blocks) / ignoring(tenant) group_left count(tempo_build_info) > 100
-    for: "6h"
-    labels:
-      severity: warning
-    annotations:
-      summary: Tempo too many outstanding compaction blocks (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is having too many outstanding compaction blocks for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors."
-  - alert: TempoDiscardedSpans
-    expr:  100 * sum by (instance,job)(rate(tempo_discarded_spans_total[5m])) / sum by (instance,job)(rate(tempo_distributor_spans_received_total[5m]))  > 5
-    for: "5m"
-    labels:
-      severity: warning
-    annotations:
-      summary: Tempo spans insertion failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% discard of spans."
-  - alert: TempoDistributorPushLatency
-    expr:  histogram_quantile(0.99, sum by(le, job, instance) (rate(tempo_distributor_push_duration_seconds_bucket[5m]))) > 3
-    for: "5m"
-    labels:
-      severity: warning
-    annotations:
-      summary: Tempo distributor push latency (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} distributor push is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
-  - alert: TempoDistributorUnhealthy
-    expr: max by (job, instance)(tempo_ring_members{state="Unhealthy", name="distributor"}) > 0
-    for: 15m
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo unhealthy distributor(s) (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is having {{ printf \"%f\" $value }} unhealthy distributor(s)."
-  - alert: TempoFailedIngestingRequests
-    expr:  sum by (job,instance)(increase (tempo_ingester_traces_created_total[5m])) / sum by (instance,job)(rate(tempo_request_duration_seconds_count{route='/tempopb.Pusher/PushBytesV2'}[5m])) == 0
-    for: "5m"
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo pushing traces to ingester failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing failure in distributors pushing traces to the ingesters."
-  - alert: TempoFrontendClients
-    expr:  tempo_query_frontend_connected_clients == 0
-    for: "5m"
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo frontend connected clients (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} has no frontend connected clients."
-  - alert: TempoFrontendQueueLatency
-    expr:  histogram_quantile(0.99, sum by(le,instance,job) (rate(tempo_query_frontend_queue_duration_seconds_bucket[15m]))) > 2
-    for: "15m"
-    labels:
-      severity: warning
-    annotations:
-      summary: Tempo frontend queue latency (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} frontend queue is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
-  - alert: TempoIngesterFlushLatency
-    expr:  histogram_quantile(0.99, sum by(le,instance,job) (rate(tempo_ingester_flush_duration_seconds_bucket[5m]))) > 5
-    for: "5m"
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo ingester flush latency (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} ingester flush is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
-  - alert: TempoIngesterFlushesFailing
-    expr: sum by (instance,job)(increase(tempo_ingester_flush_failed_retries_total[1h])) > 2 and sum by(instance,job)(increase(tempo_ingester_flush_failed_retries_total[5m])) > 0
-    for: 5m
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo ingester flush retries failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing more than 2 flush retries failures in the past hour."
-  - alert: TempoIngesterFlushesUnhealthy
-    expr: sum by (instance,job)(increase(tempo_ingester_failed_flushes_total[1h])) > 2 and sum by (instance,job)(increase(tempo_ingester_failed_flushes_total[5m])) > 0
-    for: 5m
-    labels:
-      severity: warning
-    annotations:
-      summary: Tempo ingester flush failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing more than 2 ingester flush failures in the past hour."
-  - alert: TempoIngestersUnhealthy
-    expr:  max by (instance,job)(tempo_ring_members{state="Unhealthy", name="ingester"}) > 0
-    for: "15m"
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo unhealthy ingester(s) (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is having {{ printf \"%f\" $value }} unhealthy ingester(s)."
-  - alert: TempoKVRequestErrors
-    expr:  100 * sum(rate(tempo_kv_request_duration_seconds_count{status_code=~"5.."}[5m])) by (route,instance,job) / sum(rate(tempo_kv_request_duration_seconds_count[5m])) by (route,instance,job)  > 10
-    for: "15m"
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo kv store request errors (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} {{ $labels.route }} KV store requests is experiencing {{ printf \"%.2f\" $value }}% error rate."
-  - alert: TempoTargetMissing
-    expr: up == 0
-    for: 0m
-    labels:
-      severity: critical
-    annotations:
-      summary: Prometheus target missing (instance {{ $labels.instance }})
-      description: "A Prometheus target has disappeared. An exporter might be crashed."
-  - alert: TempoNoTenantIndexBuilders
-    expr: sum by (tenant,job,instance) (tempodb_blocklist_tenant_index_builder) == 0 and  max by (tenant,job,instance)(tempodb_blocklist_length) > 0
-    for: 5m
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo tenant index builder failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is having no tenant index builders for tenant {{ $labels.tenant }}. Tenant index will quickly become stale."
-  - alert: TempoRequestErrors
-    expr: 100 * sum(rate(tempo_request_duration_seconds_count{status_code=~"5.."}[5m])) by (route,job,instance) / sum(rate(tempo_request_duration_seconds_count[5m])) by (route,job,instance)  > 10
-    for: "15m"
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo request errors (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% request error rate."
-  - alert: TempoRequestLatency
-    expr: histogram_quantile(0.99, sum by(le, route,job,instance)(rate(tempo_request_duration_seconds_bucket[5m]))) > 5
-    for: 5m
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo request latency (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
-  - alert: TempoRetentionsFailing
-    expr:  sum by (job,instance)(increase(tempodb_retention_errors_total[1h])) > 2 and sum by (job,instance)(increase(tempodb_retention_errors_total[5m])) > 0
-    for: "5m"
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo retentions failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing more than 2 retention failures in the past hour."
-  - alert: TempoTCPConnectionsLimit
-    expr:  100 * tempo_tcp_connections / tempo_tcp_connections_limit >= 80 and tempo_tcp_connections_limit > 0
-    for: "5m"
-    labels:
-      severity: warning
-    annotations:
-      summary: Tempo reaching max number of tcp connections (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is reaching {{ printf \"%.2f\" $value }}% of max tcp {{ $labels.protocol }} connections."
-  - alert: TempoTenantIndexTooOld
-    expr: max by(tenant,instance,job) (tempodb_blocklist_tenant_index_age_seconds) > 600
-    for: 5m
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo tenant old index (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing a tenant {{ $labels.tenant }} with a too old index age of 600 seconds."
-  - alert: TempoUserConfigurableOverridesReloadFailing
-    expr: sum by (instance,job)(increase(tempo_overrides_user_configurable_overrides_reload_failed_total[1h])) > 5 and sum by (instance,job)(increase(tempo_overrides_user_configurable_overrides_reload_failed_total{}[5m])) > 0
-    labels:
-      severity: critical
-    annotations:
-      summary: Tempo user config override reload failing (instance {{ $labels.instance }})
-      description: "The {{ $labels.job }} is experiencing more than 5 user-configurable override reload failures in the past hour."
diff --git a/src/prometheus_alert_rules/tempo_workers/alerts.yaml b/src/prometheus_alert_rules/workers/alerts.yaml
similarity index 82%
rename from src/prometheus_alert_rules/tempo_workers/alerts.yaml
rename to src/prometheus_alert_rules/workers/alerts.yaml
index 65109a2..cbe3095 100644
--- a/src/prometheus_alert_rules/tempo_workers/alerts.yaml
+++ b/src/prometheus_alert_rules/workers/alerts.yaml
@@ -235,6 +235,190 @@ groups:
     labels:
       component: ingester
       severity: warning
+  - alert: "TempoBlockListRisingQuickly"
+    expr: |
+      avg by (job, instance)(tempodb_blocklist_length) / avg by (job, instance)(tempodb_blocklist_length offset 7d) > 1.4
+    for: "15m"
+    labels:
+      severity: "critical"
+    annotations:
+      summary: "Tempo block list rising quickly (instance {{ $labels.instance }})"
+      description: "The {{ $labels.job }} is experiencing a 40% rise in tempo blocklist length over the last 7 days.  Consider scaling compactors."
+  - alert: TempoCompactionsFailing
+    expr: sum by (job, instance)(increase(tempodb_compaction_errors_total{}[1h])) > 2 and sum by (job, instance)(increase(tempodb_compaction_errors_total{}[5m])) > 0
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo compactions failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing more than 2 compactions failures in the past hour."
+  - alert: TempoCompactorUnhealthy
+    expr: max by (job, instance)(tempo_ring_members{state="Unhealthy", name="compactor"}) > 0
+    for: 15m
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo unhealthy compactor(s) (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is having {{ printf \"%f\" $value }} unhealthy compactor(s)."
+  - alert: TempoCompactorsTooManyOutstandingBlocks
+    expr: sum by (tenant) (tempodb_compaction_outstanding_blocks) / ignoring(tenant) group_left count(tempo_build_info) > 100
+    for: "6h"
+    labels:
+      severity: warning
+    annotations:
+      summary: Tempo too many outstanding compaction blocks (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is having too many outstanding compaction blocks for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors."
+  - alert: TempoDiscardedSpans
+    expr:  100 * sum by (instance,job)(rate(tempo_discarded_spans_total[5m])) / sum by (instance,job)(rate(tempo_distributor_spans_received_total[5m]))  > 5
+    for: "5m"
+    labels:
+      severity: warning
+    annotations:
+      summary: Tempo spans insertion failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% discard of spans."
+  - alert: TempoDistributorPushLatency
+    expr:  histogram_quantile(0.99, sum by(le, job, instance) (rate(tempo_distributor_push_duration_seconds_bucket[5m]))) > 3
+    for: "5m"
+    labels:
+      severity: warning
+    annotations:
+      summary: Tempo distributor push latency (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} distributor push is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
+  - alert: TempoDistributorUnhealthy
+    expr: max by (job, instance)(tempo_ring_members{state="Unhealthy", name="distributor"}) > 0
+    for: 15m
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo unhealthy distributor(s) (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is having {{ printf \"%f\" $value }} unhealthy distributor(s)."
+  - alert: TempoFailedIngestingRequests
+    expr:  sum by (job,instance)(increase (tempo_ingester_traces_created_total[5m])) / sum by (instance,job)(rate(tempo_request_duration_seconds_count{route='/tempopb.Pusher/PushBytesV2'}[5m])) == 0
+    for: "5m"
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo pushing traces to ingester failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing failure in distributors pushing traces to the ingesters."
+  - alert: TempoFrontendClients
+    expr:  tempo_query_frontend_connected_clients == 0
+    for: "5m"
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo frontend connected clients (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} has no frontend connected clients."
+  - alert: TempoFrontendQueueLatency
+    expr:  histogram_quantile(0.99, sum by(le,instance,job) (rate(tempo_query_frontend_queue_duration_seconds_bucket[15m]))) > 2
+    for: "15m"
+    labels:
+      severity: warning
+    annotations:
+      summary: Tempo frontend queue latency (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} frontend queue is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
+  - alert: TempoIngesterFlushLatency
+    expr:  histogram_quantile(0.99, sum by(le,instance,job) (rate(tempo_ingester_flush_duration_seconds_bucket[5m]))) > 5
+    for: "5m"
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo ingester flush latency (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} ingester flush is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
+  - alert: TempoIngesterFlushesFailing
+    expr: sum by (instance,job)(increase(tempo_ingester_flush_failed_retries_total[1h])) > 2 and sum by(instance,job)(increase(tempo_ingester_flush_failed_retries_total[5m])) > 0
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo ingester flush retries failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing more than 2 flush retries failures in the past hour."
+  - alert: TempoIngesterFlushesUnhealthy
+    expr: sum by (instance,job)(increase(tempo_ingester_failed_flushes_total[1h])) > 2 and sum by (instance,job)(increase(tempo_ingester_failed_flushes_total[5m])) > 0
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: Tempo ingester flush failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing more than 2 ingester flush failures in the past hour."
+  - alert: TempoIngestersUnhealthy
+    expr:  max by (instance,job)(tempo_ring_members{state="Unhealthy", name="ingester"}) > 0
+    for: "15m"
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo unhealthy ingester(s) (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is having {{ printf \"%f\" $value }} unhealthy ingester(s)."
+  - alert: TempoKVRequestErrors
+    expr:  100 * sum(rate(tempo_kv_request_duration_seconds_count{status_code=~"5.."}[5m])) by (route,instance,job) / sum(rate(tempo_kv_request_duration_seconds_count[5m])) by (route,instance,job)  > 10
+    for: "15m"
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo kv store request errors (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} {{ $labels.route }} KV store requests is experiencing {{ printf \"%.2f\" $value }}% error rate."
+  - alert: TempoTargetMissing
+    expr: up == 0
+    for: 0m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus target missing (instance {{ $labels.instance }})
+      description: "A Prometheus target has disappeared. An exporter might be crashed."
+  - alert: TempoNoTenantIndexBuilders
+    expr: sum by (tenant,job,instance) (tempodb_blocklist_tenant_index_builder) == 0 and  max by (tenant,job,instance)(tempodb_blocklist_length) > 0
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo tenant index builder failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is having no tenant index builders for tenant {{ $labels.tenant }}. Tenant index will quickly become stale."
+  - alert: TempoRequestErrors
+    expr: 100 * sum(rate(tempo_request_duration_seconds_count{status_code=~"5.."}[5m])) by (route,job,instance) / sum(rate(tempo_request_duration_seconds_count[5m])) by (route,job,instance)  > 10
+    for: "15m"
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo request errors (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% request error rate."
+  - alert: TempoRequestLatency
+    expr: histogram_quantile(0.99, sum by(le, route,job,instance)(rate(tempo_request_duration_seconds_bucket[5m]))) > 5
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo request latency (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency."
+  - alert: TempoRetentionsFailing
+    expr:  sum by (job,instance)(increase(tempodb_retention_errors_total[1h])) > 2 and sum by (job,instance)(increase(tempodb_retention_errors_total[5m])) > 0
+    for: "5m"
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo retentions failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing more than 2 retention failures in the past hour."
+  - alert: TempoTCPConnectionsLimit
+    expr:  100 * tempo_tcp_connections / tempo_tcp_connections_limit >= 80 and tempo_tcp_connections_limit > 0
+    for: "5m"
+    labels:
+      severity: warning
+    annotations:
+      summary: Tempo reaching max number of tcp connections (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is reaching {{ printf \"%.2f\" $value }}% of max tcp {{ $labels.protocol }} connections."
+  - alert: TempoTenantIndexTooOld
+    expr: max by(tenant,instance,job) (tempodb_blocklist_tenant_index_age_seconds) > 600
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo tenant old index (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing a tenant {{ $labels.tenant }} with a too old index age of 600 seconds."
+  - alert: TempoUserConfigurableOverridesReloadFailing
+    expr: sum by (instance,job)(increase(tempo_overrides_user_configurable_overrides_reload_failed_total[1h])) > 5 and sum by (instance,job)(increase(tempo_overrides_user_configurable_overrides_reload_failed_total{}[5m])) > 0
+    labels:
+      severity: critical
+    annotations:
+      summary: Tempo user config override reload failing (instance {{ $labels.instance }})
+      description: "The {{ $labels.job }} is experiencing more than 5 user-configurable override reload failures in the past hour."
 - name: tempo_instance_limits_alerts
   rules:
   - alert: TempoIngesterReachingSeriesLimit
diff --git a/src/prometheus_alert_rules/tempo_workers/rules.yaml b/src/prometheus_alert_rules/workers/rules.yaml
similarity index 100%
rename from src/prometheus_alert_rules/tempo_workers/rules.yaml
rename to src/prometheus_alert_rules/workers/rules.yaml
diff --git a/src/tempo.py b/src/tempo.py
index e1b64cc..04902e6 100644
--- a/src/tempo.py
+++ b/src/tempo.py
@@ -74,6 +74,7 @@ def config(
 
         Only activate the provided receivers.
         """
+
         config = tempo_config.Tempo(
             auth_enabled=False,
             server=self._build_server_config(coordinator.tls_available),
@@ -157,11 +158,10 @@ def _build_querier_config(self, roles_addresses: Dict[str, Set[str]]):
         # if distributor and query-frontend have the same address, then the mode of operation is 'all'.
         if roles_addresses.get(tempo_config.TempoRole.query_frontend) == roles_addresses.get(
             tempo_config.TempoRole.distributor
-            or (not roles_addresses.get(tempo_config.TempoRole.query_frontend))
-        ):
+        ) or not roles_addresses.get(tempo_config.TempoRole.query_frontend):
             addr = "localhost"
         else:
-            addr = roles_addresses.get(tempo_config.TempoRole.query_frontend).pop()
+            addr = roles_addresses.get(tempo_config.TempoRole.query_frontend, set()).pop()
 
         return tempo_config.Querier(
             frontend_worker=tempo_config.FrontendWorker(
diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
index 887c3e0..059ee5a 100644
--- a/tests/integration/helpers.py
+++ b/tests/integration/helpers.py
@@ -1,13 +1,28 @@
+import json
 import logging
+import os
+import shlex
 import subprocess
+import tempfile
 from dataclasses import dataclass
-from typing import Dict
+from pathlib import Path
+from typing import Dict, Literal
 
 import yaml
+from juju.application import Application
+from juju.unit import Unit
+from minio import Minio
 from pytest_operator.plugin import OpsTest
 
 _JUJU_DATA_CACHE = {}
 _JUJU_KEYS = ("egress-subnets", "ingress-address", "private-address")
+ACCESS_KEY = "accesskey"
+SECRET_KEY = "secretkey"
+MINIO = "minio"
+BUCKET_NAME = "tempo"
+S3_INTEGRATOR = "s3-integrator"
+WORKER_NAME = "tempo-worker"
+APP_NAME = "tempo"
 
 logger = logging.getLogger(__name__)
 
@@ -187,3 +202,108 @@ async def run_command(model_name: str, app_name: str, unit_num: int, command: li
         logger.error(e.stdout.decode())
         raise e
     return res.stdout
+
+
+def present_facade(
+    interface: str,
+    app_data: Dict = None,
+    unit_data: Dict = None,
+    role: Literal["provide", "require"] = "provide",
+    model: str = None,
+    app: str = "facade",
+):
+    """Set up the facade charm to present this data over the interface ``interface``."""
+    data = {
+        "endpoint": f"{role}-{interface}",
+    }
+    if app_data:
+        data["app_data"] = json.dumps(app_data)
+    if unit_data:
+        data["unit_data"] = json.dumps(unit_data)
+
+    with tempfile.NamedTemporaryFile(dir=os.getcwd()) as f:
+        fpath = Path(f.name)
+        fpath.write_text(yaml.safe_dump(data))
+
+        _model = f" --model {model}" if model else ""
+
+        subprocess.run(shlex.split(f"juju run {app}/0{_model} update --params {fpath.absolute()}"))
+
+
+async def get_unit_address(ops_test: OpsTest, app_name, unit_no):
+    status = await ops_test.model.get_status()
+    app = status["applications"][app_name]
+    if app is None:
+        assert False, f"no app exists with name {app_name}"
+    unit = app["units"].get(f"{app_name}/{unit_no}")
+    if unit is None:
+        assert False, f"no unit exists in app {app_name} with index {unit_no}"
+    return unit["address"]
+
+
+async def deploy_and_configure_minio(ops_test: OpsTest):
+    config = {
+        "access-key": ACCESS_KEY,
+        "secret-key": SECRET_KEY,
+    }
+    await ops_test.model.deploy(MINIO, channel="edge", trust=True, config=config)
+    await ops_test.model.wait_for_idle(apps=[MINIO], status="active", timeout=2000)
+    minio_addr = await get_unit_address(ops_test, MINIO, "0")
+
+    mc_client = Minio(
+        f"{minio_addr}:9000",
+        access_key="accesskey",
+        secret_key="secretkey",
+        secure=False,
+    )
+
+    # create tempo bucket
+    found = mc_client.bucket_exists(BUCKET_NAME)
+    if not found:
+        mc_client.make_bucket(BUCKET_NAME)
+
+    # configure s3-integrator
+    s3_integrator_app: Application = ops_test.model.applications[S3_INTEGRATOR]
+    s3_integrator_leader: Unit = s3_integrator_app.units[0]
+
+    await s3_integrator_app.set_config(
+        {
+            "endpoint": f"minio-0.minio-endpoints.{ops_test.model.name}.svc.cluster.local:9000",
+            "bucket": BUCKET_NAME,
+        }
+    )
+
+    action = await s3_integrator_leader.run_action("sync-s3-credentials", **config)
+    action_result = await action.wait()
+    assert action_result.status == "completed"
+
+
+async def deploy_cluster(ops_test: OpsTest):
+    # await ops_test.model.deploy(FACADE, channel="edge")
+    # await ops_test.model.wait_for_idle(
+    #     apps=[FACADE], raise_on_blocked=True, status="active", timeout=2000
+    # )
+
+    # TODO: deploy from latest edge
+    tempo_worker_charm = "/home/michael/Work/tempo-worker-k8s-operator/charm"
+
+    resources = {
+        "tempo-image": "docker.io/ubuntu/tempo:2-22.04",
+    }
+
+    await ops_test.model.deploy(
+        tempo_worker_charm, resources=resources, application_name=WORKER_NAME
+    )
+    await ops_test.model.deploy(S3_INTEGRATOR, channel="edge")
+
+    await ops_test.model.integrate(APP_NAME + ":s3", S3_INTEGRATOR + ":s3-credentials")
+    await ops_test.model.integrate(APP_NAME + ":tempo-cluster", WORKER_NAME + ":tempo-cluster")
+
+    await deploy_and_configure_minio(ops_test)
+
+    await ops_test.model.wait_for_idle(
+        apps=[APP_NAME, WORKER_NAME, S3_INTEGRATOR],
+        status="active",
+        timeout=1000,
+        idle_period=30,
+    )
diff --git a/tests/integration/test_ingressed_tls.py b/tests/integration/test_ingressed_tls.py
index 896f385..10b8275 100644
--- a/tests/integration/test_ingressed_tls.py
+++ b/tests/integration/test_ingressed_tls.py
@@ -1,138 +1,151 @@
-# TODO: uncomment and fix when the fully functional tempo cluster is ready (e.g: traces get ingested, can query for traces)
-# import asyncio
-# import json
-# import logging
-# import random
-# import subprocess
-# import tempfile
-# from pathlib import Path
-
-# import pytest
-# import requests
-# import yaml
-# from pytest_operator.plugin import OpsTest
-# from tenacity import retry, stop_after_attempt, wait_exponential
-
-# from tempo import Tempo
-# from tests.integration.helpers import get_relation_data
-
-# METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
-# APP_NAME = "tempo"
-# SSC = "self-signed-certificates"
-# SSC_APP_NAME = "ssc"
-# TRAEFIK = "traefik-k8s"
-# TRAEFIK_APP_NAME = "trfk"
-# TRACEGEN_SCRIPT_PATH = Path() / "scripts" / "tracegen.py"
-
-# logger = logging.getLogger(__name__)
-
-
-# @pytest.fixture(scope="function")
-# def nonce():
-#     """Generate an integer nonce for easier trace querying."""
-#     return str(random.random())[2:]
-
-
-# @pytest.fixture(scope="function")
-# def server_cert(ops_test: OpsTest):
-#     data = get_relation_data(
-#         requirer_endpoint=f"{APP_NAME}/0:certificates",
-#         provider_endpoint=f"{SSC_APP_NAME}/0:certificates",
-#         model=ops_test.model.name,
-#     )
-#     cert = json.loads(data.provider.application_data["certificates"])[0]["certificate"]
-
-#     with tempfile.NamedTemporaryFile() as f:
-#         p = Path(f.name)
-#         p.write_text(cert)
-#         yield p
-
-
-# def get_traces(tempo_host: str, nonce, service_name="tracegen"):
-#     req = requests.get(
-#         "https://" + tempo_host + ":3200/api/search",
-#         params={"service.name": service_name, "nonce": nonce},
-#         verify=False,
-#     )
-#     assert req.status_code == 200
-#     return json.loads(req.text)["traces"]
-
-
-# @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
-# async def get_traces_patiently(tempo_host, nonce):
-#     assert get_traces(tempo_host, nonce=nonce)
-
-
-# async def get_tempo_host(ops_test: OpsTest):
-#     status = await ops_test.model.get_status()
-#     app = status["applications"][TRAEFIK_APP_NAME]
-#     return app.public_address
-
-
-# async def emit_trace(
-#     endpoint, ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False
-# ):
-#     """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
-#     cmd = (
-#         f"juju ssh -m {ops_test.model_name} {APP_NAME}/0 "
-#         f"TRACEGEN_ENDPOINT={endpoint} "
-#         f"TRACEGEN_VERBOSE={verbose} "
-#         f"TRACEGEN_PROTOCOL={proto} "
-#         f"TRACEGEN_CERT={Tempo.server_cert_path if use_cert else ''} "
-#         f"TRACEGEN_NONCE={nonce} "
-#         "python3 tracegen.py"
-#     )
-
-#     return subprocess.getoutput(cmd)
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_build_and_deploy(ops_test: OpsTest):
-#     tempo_charm = await ops_test.build_charm(".")
-#     await asyncio.gather(
-#         ops_test.model.deploy(tempo_charm, application_name=APP_NAME),
-#         ops_test.model.deploy(SSC, application_name=SSC_APP_NAME),
-#         ops_test.model.deploy(TRAEFIK, application_name=TRAEFIK_APP_NAME, channel="edge"),
-#     )
-
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(
-#             apps=[APP_NAME, SSC_APP_NAME, TRAEFIK_APP_NAME],
-#             status="active",
-#             raise_on_blocked=True,
-#             timeout=10000,
-#             raise_on_error=False,
-#         ),
-#     )
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_push_tracegen_script_and_deps(ops_test: OpsTest):
-#     await ops_test.juju("scp", TRACEGEN_SCRIPT_PATH, f"{APP_NAME}/0:tracegen.py")
-#     await ops_test.juju(
-#         "ssh",
-#         f"{APP_NAME}/0",
-#         "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http",
-#     )
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_relate(ops_test: OpsTest):
-#     await ops_test.model.integrate(APP_NAME + ":certificates", SSC_APP_NAME + ":certificates")
-#     await ops_test.model.integrate(
-#         SSC_APP_NAME + ":certificates", TRAEFIK_APP_NAME + ":certificates"
-#     )
-#     await ops_test.model.integrate(APP_NAME + ":ingress", TRAEFIK_APP_NAME + ":traefik-route")
-#     await ops_test.model.wait_for_idle(
-#         apps=[APP_NAME, SSC_APP_NAME, TRAEFIK_APP_NAME],
-#         status="active",
-#         timeout=1000,
-#     )
-
-
+import asyncio
+import json
+import logging
+import random
+import subprocess
+import tempfile
+from pathlib import Path
+
+import pytest
+import requests
+import yaml
+from helpers import deploy_cluster
+from juju.application import Application
+from pytest_operator.plugin import OpsTest
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from tempo import Tempo
+from tests.integration.helpers import get_relation_data
+
+METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
+APP_NAME = "tempo"
+SSC = "self-signed-certificates"
+SSC_APP_NAME = "ssc"
+TRAEFIK = "traefik-k8s"
+TRAEFIK_APP_NAME = "trfk"
+TRACEGEN_SCRIPT_PATH = Path() / "scripts" / "tracegen.py"
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture(scope="function")
+def nonce():
+    """Generate an integer nonce for easier trace querying."""
+    return str(random.random())[2:]
+
+
+@pytest.fixture(scope="function")
+def server_cert(ops_test: OpsTest):
+    data = get_relation_data(
+        requirer_endpoint=f"{APP_NAME}/0:certificates",
+        provider_endpoint=f"{SSC_APP_NAME}/0:certificates",
+        model=ops_test.model.name,
+    )
+    cert = json.loads(data.provider.application_data["certificates"])[0]["certificate"]
+
+    with tempfile.NamedTemporaryFile() as f:
+        p = Path(f.name)
+        p.write_text(cert)
+        yield p
+
+
+def get_traces(tempo_host: str, nonce, service_name="tracegen"):
+    req = requests.get(
+        "https://" + tempo_host + ":3200/api/search",
+        params={"service.name": service_name, "nonce": nonce},
+        verify=False,
+    )
+    assert req.status_code == 200
+    return json.loads(req.text)["traces"]
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
+async def get_traces_patiently(tempo_host, nonce):
+    assert get_traces(tempo_host, nonce=nonce)
+
+
+async def get_tempo_host(ops_test: OpsTest):
+    status = await ops_test.model.get_status()
+    app = status["applications"][TRAEFIK_APP_NAME]
+    return app.public_address
+
+
+async def emit_trace(
+    endpoint, ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False
+):
+    """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
+    cmd = (
+        f"juju ssh -m {ops_test.model_name} {APP_NAME}/0 "
+        f"TRACEGEN_ENDPOINT={endpoint} "
+        f"TRACEGEN_VERBOSE={verbose} "
+        f"TRACEGEN_PROTOCOL={proto} "
+        f"TRACEGEN_CERT={Tempo.tls_ca_path if use_cert else ''} "
+        f"TRACEGEN_NONCE={nonce} "
+        "python3 tracegen.py"
+    )
+
+    return subprocess.getoutput(cmd)
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_build_and_deploy(ops_test: OpsTest):
+    # tempo_charm = await ops_test.build_charm(".")
+    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    resources = {
+        "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
+        "nginx-prometheus-exporter-image": METADATA["resources"][
+            "nginx-prometheus-exporter-image"
+        ]["upstream-source"],
+    }
+    await asyncio.gather(
+        ops_test.model.deploy(tempo_charm, resources=resources, application_name=APP_NAME),
+        ops_test.model.deploy(SSC, application_name=SSC_APP_NAME),
+        ops_test.model.deploy(TRAEFIK, application_name=TRAEFIK_APP_NAME, channel="edge"),
+    )
+
+    # deploy cluster
+    await deploy_cluster(ops_test)
+
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(
+            apps=[APP_NAME, SSC_APP_NAME, TRAEFIK_APP_NAME],
+            status="active",
+            raise_on_blocked=True,
+            timeout=10000,
+            raise_on_error=False,
+        ),
+    )
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_push_tracegen_script_and_deps(ops_test: OpsTest):
+    await ops_test.juju("scp", TRACEGEN_SCRIPT_PATH, f"{APP_NAME}/0:tracegen.py")
+    await ops_test.juju(
+        "ssh",
+        f"{APP_NAME}/0",
+        "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http",
+    )
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_relate(ops_test: OpsTest):
+    await ops_test.model.integrate(APP_NAME + ":certificates", SSC_APP_NAME + ":certificates")
+    await ops_test.model.integrate(
+        SSC_APP_NAME + ":certificates", TRAEFIK_APP_NAME + ":certificates"
+    )
+    await ops_test.model.integrate(APP_NAME + ":ingress", TRAEFIK_APP_NAME + ":traefik-route")
+    await ops_test.model.wait_for_idle(
+        apps=[APP_NAME, SSC_APP_NAME, TRAEFIK_APP_NAME],
+        status="active",
+        timeout=1000,
+    )
+
+
+# TODO: Uncomment and fix once below issue is fixed
+# Currently, traefik, through `traefik_route`, accepts traffic through http although TLS is enabled.
 # @pytest.mark.abort_on_fail
 # async def test_verify_ingressed_trace_http_upgrades_to_tls(ops_test: OpsTest, nonce):
 #     tempo_host = await get_tempo_host(ops_test)
@@ -145,34 +158,49 @@
 #     assert get_traces_patiently(tempo_host, nonce=nonce)
 
 
-# @pytest.mark.abort_on_fail
-# async def test_verify_ingressed_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
-#     tempo_host = await get_tempo_host(ops_test)
-#     await emit_trace(
-#         f"https://{tempo_host}:4318/v1/traces", nonce=nonce, ops_test=ops_test, use_cert=True
-#     )
-#     # THEN we can verify it's been ingested
-#     assert get_traces_patiently(tempo_host, nonce=nonce)
-
-
-# @pytest.mark.abort_on_fail
-# async def test_verify_ingressed_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
-#     tempo_host = await get_tempo_host(ops_test)
-#     await emit_trace(
-#         f"{tempo_host}:4317", nonce=nonce, proto="grpc", ops_test=ops_test, use_cert=True
-#     )
-#     # THEN we can verify it's been ingested
-#     assert get_traces_patiently(tempo_host, nonce=nonce)
-
-
-# @pytest.mark.teardown
-# @pytest.mark.abort_on_fail
-# async def test_remove_relation(ops_test: OpsTest):
-#     await ops_test.juju(
-#         "remove-relation", APP_NAME + ":certificates", SSC_APP_NAME + ":certificates"
-#     )
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(
-#             apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000
-#         ),
-#     )
+@pytest.mark.abort_on_fail
+async def test_verify_ingressed_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
+    tempo_host = await get_tempo_host(ops_test)
+
+    await emit_trace(
+        f"https://{tempo_host}:4318/v1/traces", nonce=nonce, ops_test=ops_test, use_cert=True
+    )
+    # THEN we can verify it's been ingested
+    assert get_traces_patiently(tempo_host, nonce=nonce)
+
+
+@pytest.mark.abort_on_fail
+async def test_verify_ingressed_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
+    # enable otlp grpc receiver
+    tempo_app: Application = ops_test.model.applications[APP_NAME]
+    await tempo_app.set_config(
+        {
+            "always_enable_otlp_grpc": "True",
+        }
+    )
+    await ops_test.model.wait_for_idle(
+        apps=[APP_NAME],
+        status="active",
+        timeout=1000,
+    )
+
+    tempo_host = await get_tempo_host(ops_test)
+
+    await emit_trace(
+        f"{tempo_host}:4317", nonce=nonce, proto="grpc", ops_test=ops_test, use_cert=True
+    )
+    # THEN we can verify it's been ingested
+    assert get_traces_patiently(tempo_host, nonce=nonce)
+
+
+@pytest.mark.teardown
+@pytest.mark.abort_on_fail
+async def test_remove_relation(ops_test: OpsTest):
+    await ops_test.juju(
+        "remove-relation", APP_NAME + ":certificates", SSC_APP_NAME + ":certificates"
+    )
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(
+            apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000
+        ),
+    )
diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py
index 2010ace..eec0039 100644
--- a/tests/integration/test_integration.py
+++ b/tests/integration/test_integration.py
@@ -1,166 +1,168 @@
-# TODO: uncomment and fix when the fully functional tempo cluster is ready (e.g: traces get ingested, can query for traces)
-# import asyncio
-# import json
-# import logging
-# from pathlib import Path
-
-# import pytest
-# import yaml
-# from pytest_operator.plugin import OpsTest
-
-# METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
-# APP_NAME = METADATA["name"]
-# TESTER_METADATA = yaml.safe_load(Path("./tests/integration/tester/metadata.yaml").read_text())
-# TESTER_APP_NAME = TESTER_METADATA["name"]
-# TESTER_GRPC_METADATA = yaml.safe_load(
-#     Path("./tests/integration/tester-grpc/metadata.yaml").read_text()
-# )
-# TESTER_GRPC_APP_NAME = TESTER_GRPC_METADATA["name"]
-
-# logger = logging.getLogger(__name__)
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_build_and_deploy(ops_test: OpsTest):
-#     # Given a fresh build of the charm
-#     # When deploying it together with testers
-#     # Then applications should eventually be created
-#     tempo_charm = await ops_test.build_charm(".")
-#     tester_charm = await ops_test.build_charm("./tests/integration/tester/")
-#     tester_grpc_charm = await ops_test.build_charm("./tests/integration/tester-grpc/")
-#     resources_tester = {"workload": TESTER_METADATA["resources"]["workload"]["upstream-source"]}
-#     resources_tester_grpc = {
-#         "workload": TESTER_GRPC_METADATA["resources"]["workload"]["upstream-source"]
-#     }
-
-#     await asyncio.gather(
-#         ops_test.model.deploy(tempo_charm, application_name=APP_NAME),
-#         ops_test.model.deploy(
-#             tester_charm,
-#             resources=resources_tester,
-#             application_name=TESTER_APP_NAME,
-#             num_units=3,
-#         ),
-#         ops_test.model.deploy(
-#             tester_grpc_charm,
-#             resources=resources_tester_grpc,
-#             application_name=TESTER_GRPC_APP_NAME,
-#             num_units=3,
-#         ),
-#     )
-
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(
-#             apps=[APP_NAME],
-#             status="active",
-#             raise_on_blocked=True,
-#             timeout=10000,
-#             raise_on_error=False,
-#         ),
-#         # for tester, depending on the result of race with tempo it's either waiting or active
-#         ops_test.model.wait_for_idle(
-#             apps=[TESTER_APP_NAME], raise_on_blocked=True, timeout=1000, raise_on_error=False
-#         ),
-#         ops_test.model.wait_for_idle(
-#             apps=[TESTER_GRPC_APP_NAME], raise_on_blocked=True, timeout=1000, raise_on_error=False
-#         ),
-#     )
-
-#     assert ops_test.model.applications[APP_NAME].units[0].workload_status == "active"
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_relate(ops_test: OpsTest):
-#     # given a deployed charm
-#     # when relating it together with the tester
-#     # then relation should appear
-#     await ops_test.model.add_relation(APP_NAME + ":tracing", TESTER_APP_NAME + ":tracing")
-#     await ops_test.model.add_relation(APP_NAME + ":tracing", TESTER_GRPC_APP_NAME + ":tracing")
-#     await ops_test.model.wait_for_idle(
-#         apps=[APP_NAME, TESTER_APP_NAME, TESTER_GRPC_APP_NAME],
-#         status="active",
-#         timeout=1000,
-#     )
-
-
-# async def test_verify_traces_http(ops_test: OpsTest):
-#     # given a relation between charms
-#     # when traces endpoint is queried
-#     # then it should contain traces from tester charm
-#     status = await ops_test.model.get_status()
-#     app = status["applications"][APP_NAME]
-#     logger.info(app.public_address)
-#     endpoint = app.public_address + ":3200/api/search"
-#     cmd = [
-#         "curl",
-#         endpoint,
-#     ]
-#     rc, stdout, stderr = await ops_test.run(*cmd)
-#     logger.info("%s: %s", endpoint, (rc, stdout, stderr))
-#     assert rc == 0, (
-#         f"curl exited with rc={rc} for {endpoint}; "
-#         f"non-zero return code means curl encountered a >= 400 HTTP code; "
-#         f"cmd={cmd}"
-#     )
-#     traces = json.loads(stdout)["traces"]
-
-#     found = False
-#     for trace in traces:
-#         if trace["rootServiceName"] == APP_NAME and trace["rootTraceName"] == "charm exec":
-#             found = True
-
-#     assert found, f"There's no trace of charm exec traces in tempo. {json.dumps(traces, indent=2)}"
-
-
-# async def test_verify_traces_grpc(ops_test: OpsTest):
-#     # the tester-grpc charm emits a single grpc trace in its common exit hook
-#     # we verify it's there
-#     status = await ops_test.model.get_status()
-#     app = status["applications"][APP_NAME]
-#     logger.info(app.public_address)
-#     endpoint = app.public_address + ":3200/api/search"
-#     cmd = [
-#         "curl",
-#         endpoint,
-#     ]
-#     rc, stdout, stderr = await ops_test.run(*cmd)
-#     logger.info("%s: %s", endpoint, (rc, stdout, stderr))
-#     assert rc == 0, (
-#         f"curl exited with rc={rc} for {endpoint}; "
-#         f"non-zero return code means curl encountered a >= 400 HTTP code; "
-#         f"cmd={cmd}"
-#     )
-#     traces = json.loads(stdout)["traces"]
-
-#     found = False
-#     for trace in traces:
-#         if trace["rootServiceName"] == "TempoTesterGrpcCharm":
-#             found = True
-
-#     assert (
-#         found
-#     ), f"There's no trace of generated grpc traces in tempo. {json.dumps(traces, indent=2)}"
-
-
-# @pytest.mark.teardown
-# @pytest.mark.abort_on_fail
-# async def test_remove_relation(ops_test: OpsTest):
-#     # given related charms
-#     # when relation is removed
-#     # then both charms should become active again
-#     await ops_test.juju("remove-relation", APP_NAME + ":tracing", TESTER_APP_NAME + ":tracing")
-#     await ops_test.juju(
-#         "remove-relation", APP_NAME + ":tracing", TESTER_GRPC_APP_NAME + ":tracing"
-#     )
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(
-#             apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000
-#         ),
-#         # for tester, depending on the result of race with tempo it's either waiting or active
-#         ops_test.model.wait_for_idle(apps=[TESTER_APP_NAME], raise_on_blocked=True, timeout=1000),
-#         ops_test.model.wait_for_idle(
-#             apps=[TESTER_GRPC_APP_NAME], raise_on_blocked=True, timeout=1000
-#         ),
-#     )
+import asyncio
+import json
+import logging
+from pathlib import Path
+
+import pytest
+import yaml
+from helpers import deploy_cluster
+from pytest_operator.plugin import OpsTest
+
+METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
+APP_NAME = "tempo"
+TESTER_METADATA = yaml.safe_load(Path("./tests/integration/tester/metadata.yaml").read_text())
+TESTER_APP_NAME = TESTER_METADATA["name"]
+TESTER_GRPC_METADATA = yaml.safe_load(
+    Path("./tests/integration/tester-grpc/metadata.yaml").read_text()
+)
+TESTER_GRPC_APP_NAME = TESTER_GRPC_METADATA["name"]
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_build_and_deploy(ops_test: OpsTest):
+    # Given a fresh build of the charm
+    # When deploying it together with testers
+    # Then applications should eventually be created
+    # tempo_charm = await ops_test.build_charm(".")
+    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    tester_charm = await ops_test.build_charm("./tests/integration/tester/")
+    tester_grpc_charm = await ops_test.build_charm("./tests/integration/tester-grpc/")
+    resources = {
+        "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
+        "nginx-prometheus-exporter-image": METADATA["resources"][
+            "nginx-prometheus-exporter-image"
+        ]["upstream-source"],
+    }
+    resources_tester = {"workload": TESTER_METADATA["resources"]["workload"]["upstream-source"]}
+    resources_tester_grpc = {
+        "workload": TESTER_GRPC_METADATA["resources"]["workload"]["upstream-source"]
+    }
+
+    await asyncio.gather(
+        ops_test.model.deploy(tempo_charm, resources=resources, application_name=APP_NAME),
+        ops_test.model.deploy(
+            tester_charm,
+            resources=resources_tester,
+            application_name=TESTER_APP_NAME,
+            num_units=3,
+        ),
+        ops_test.model.deploy(
+            tester_grpc_charm,
+            resources=resources_tester_grpc,
+            application_name=TESTER_GRPC_APP_NAME,
+            num_units=3,
+        ),
+    )
+
+    # deploy cluster
+    await deploy_cluster(ops_test)
+
+    await asyncio.gather(
+        # for tester, depending on the result of race with tempo it's either waiting or active
+        ops_test.model.wait_for_idle(
+            apps=[TESTER_APP_NAME], raise_on_blocked=True, timeout=1000, raise_on_error=False
+        ),
+        ops_test.model.wait_for_idle(
+            apps=[TESTER_GRPC_APP_NAME], raise_on_blocked=True, timeout=1000, raise_on_error=False
+        ),
+    )
+
+    assert ops_test.model.applications[APP_NAME].units[0].workload_status == "active"
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_relate(ops_test: OpsTest):
+    # given a deployed charm
+    # when relating it together with the tester
+    # then relation should appear
+    await ops_test.model.add_relation(APP_NAME + ":tracing", TESTER_APP_NAME + ":tracing")
+    await ops_test.model.add_relation(APP_NAME + ":tracing", TESTER_GRPC_APP_NAME + ":tracing")
+    await ops_test.model.wait_for_idle(
+        apps=[APP_NAME, TESTER_APP_NAME, TESTER_GRPC_APP_NAME],
+        status="active",
+        timeout=1000,
+    )
+
+
+async def test_verify_traces_http(ops_test: OpsTest):
+    # given a relation between charms
+    # when traces endpoint is queried
+    # then it should contain traces from tester charm
+    status = await ops_test.model.get_status()
+    app = status["applications"][APP_NAME]
+    endpoint = app.public_address + ":3200/api/search"
+    cmd = [
+        "curl",
+        endpoint,
+    ]
+    rc, stdout, stderr = await ops_test.run(*cmd)
+    logger.info("%s: %s", endpoint, (rc, stdout, stderr))
+    assert rc == 0, (
+        f"curl exited with rc={rc} for {endpoint}; "
+        f"non-zero return code means curl encountered a >= 400 HTTP code; "
+        f"cmd={cmd}"
+    )
+    traces = json.loads(stdout)["traces"]
+
+    found = False
+    for trace in traces:
+        if trace["rootServiceName"] == "TempoTesterCharm":
+            found = True
+
+    assert found, f"There's no trace of charm exec traces in tempo. {json.dumps(traces, indent=2)}"
+
+
+async def test_verify_traces_grpc(ops_test: OpsTest):
+    # the tester-grpc charm emits a single grpc trace in its common exit hook
+    # we verify it's there
+    status = await ops_test.model.get_status()
+    app = status["applications"][APP_NAME]
+    logger.info(app.public_address)
+    endpoint = app.public_address + ":3200/api/search"
+    cmd = [
+        "curl",
+        endpoint,
+    ]
+    rc, stdout, stderr = await ops_test.run(*cmd)
+    logger.info("%s: %s", endpoint, (rc, stdout, stderr))
+    assert rc == 0, (
+        f"curl exited with rc={rc} for {endpoint}; "
+        f"non-zero return code means curl encountered a >= 400 HTTP code; "
+        f"cmd={cmd}"
+    )
+    traces = json.loads(stdout)["traces"]
+
+    found = False
+    for trace in traces:
+        if trace["rootServiceName"] == "TempoTesterGrpcCharm":
+            found = True
+
+    assert (
+        found
+    ), f"There's no trace of generated grpc traces in tempo. {json.dumps(traces, indent=2)}"
+
+
+@pytest.mark.teardown
+@pytest.mark.abort_on_fail
+async def test_remove_relation(ops_test: OpsTest):
+    # given related charms
+    # when relation is removed
+    # then both charms should become active again
+    await ops_test.juju("remove-relation", APP_NAME + ":tracing", TESTER_APP_NAME + ":tracing")
+    await ops_test.juju(
+        "remove-relation", APP_NAME + ":tracing", TESTER_GRPC_APP_NAME + ":tracing"
+    )
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(
+            apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000
+        ),
+        # for tester, depending on the result of race with tempo it's either waiting or active
+        ops_test.model.wait_for_idle(apps=[TESTER_APP_NAME], raise_on_blocked=True, timeout=1000),
+        ops_test.model.wait_for_idle(
+            apps=[TESTER_GRPC_APP_NAME], raise_on_blocked=True, timeout=1000
+        ),
+    )
diff --git a/tests/integration/test_scaling_monolithic.py b/tests/integration/test_scaling_monolithic.py
index ed76714..f7a247f 100644
--- a/tests/integration/test_scaling_monolithic.py
+++ b/tests/integration/test_scaling_monolithic.py
@@ -1,22 +1,16 @@
-import json
 import logging
-import os
-import shlex
-import tempfile
 from pathlib import Path
-from subprocess import run
-from typing import Dict, Literal
 
 import pytest
 import yaml
+from helpers import deploy_cluster
 from juju.application import Application
 from pytest_operator.plugin import OpsTest
 
 METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
-APP_NAME = "tempo-coordinator"
-FACADE = "facade"
+APP_NAME = "tempo"
+S3_INTEGRATOR = "s3-integrator"
 TRACEGEN_SCRIPT_PATH = Path() / "scripts" / "tracegen.py"
-FACADE_MOCKS_PATH = "/var/lib/juju/agents/unit-facade-0/charm/mocks"
 
 logger = logging.getLogger(__name__)
 
@@ -24,7 +18,9 @@
 @pytest.mark.setup
 @pytest.mark.abort_on_fail
 async def test_deploy_tempo(ops_test: OpsTest):
-    tempo_charm = await ops_test.build_charm(".")
+    # tempo_charm = await ops_test.build_charm(".")
+    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+
     resources = {
         "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
         "nginx-prometheus-exporter-image": METADATA["resources"][
@@ -54,85 +50,15 @@ async def test_scale_tempo_up_without_s3_blocks(ops_test: OpsTest):
     )
 
 
-def present_facade(
-    interface: str,
-    app_data: Dict = None,
-    unit_data: Dict = None,
-    role: Literal["provide", "require"] = "provide",
-    model: str = None,
-    app: str = "facade",
-):
-    """Set up the facade charm to present this data over the interface ``interface``."""
-    data = {
-        "endpoint": f"{role}-{interface}",
-    }
-    if app_data:
-        data["app_data"] = json.dumps(app_data)
-    if unit_data:
-        data["unit_data"] = json.dumps(unit_data)
-
-    with tempfile.NamedTemporaryFile(dir=os.getcwd()) as f:
-        fpath = Path(f.name)
-        fpath.write_text(yaml.safe_dump(data))
-
-        _model = f" --model {model}" if model else ""
-
-        run(shlex.split(f"juju run {app}/0{_model} update --params {fpath.absolute()}"))
-
-
 @pytest.mark.setup
 @pytest.mark.abort_on_fail
-async def test_tempo_active_when_deploy_s3_and_workers_facade(ops_test: OpsTest):
-    await ops_test.model.deploy(FACADE, channel="edge")
-    await ops_test.model.wait_for_idle(
-        apps=[FACADE], raise_on_blocked=True, status="active", timeout=2000
-    )
-
-    await ops_test.model.integrate(APP_NAME + ":s3", FACADE + ":provide-s3")
-    await ops_test.model.integrate(APP_NAME + ":tempo-cluster", FACADE + ":require-tempo_cluster")
-
-    present_facade(
-        "s3",
-        model=ops_test.model_name,
-        app_data={
-            "access-key": "key",
-            "bucket": "tempo",
-            "endpoint": "http://1.2.3.4:9000",
-            "secret-key": "soverysecret",
-        },
-    )
-
-    present_facade(
-        "tempo_cluster",
-        model=ops_test.model_name,
-        app_data={
-            "role": '"all"',
-        },
-        unit_data={
-            "juju_topology": json.dumps({"model": ops_test.model_name, "unit": FACADE + "/0"}),
-            "address": FACADE + ".cluster.local.svc",
-        },
-        role="require",
-    )
-
-    await ops_test.model.wait_for_idle(
-        apps=[FACADE],
-        raise_on_blocked=True,
-        status="active",
-        timeout=2000,
-    )
-
-    await ops_test.model.wait_for_idle(
-        apps=[APP_NAME],
-        raise_on_blocked=True,
-        status="active",
-        timeout=10000,
-    )
+async def test_tempo_active_when_deploy_s3_and_workers(ops_test: OpsTest):
+    await deploy_cluster(ops_test)
 
 
 @pytest.mark.teardown
 async def test_tempo_blocks_if_s3_goes_away(ops_test: OpsTest):
-    app: Application = ops_test.model.applications[FACADE]
+    app: Application = ops_test.model.applications[S3_INTEGRATOR]
     await app.destroy(destroy_storage=True)
     await ops_test.model.wait_for_idle(
         apps=[APP_NAME],
diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py
index 5ea977f..5781f56 100644
--- a/tests/integration/test_tls.py
+++ b/tests/integration/test_tls.py
@@ -1,171 +1,200 @@
-# TODO: uncomment and fix when the fully functional tempo cluster is ready (e.g: traces get ingested, can query for traces)
-# import asyncio
-# import json
-# import logging
-# import random
-# import tempfile
-# from pathlib import Path
-# from subprocess import getoutput
-
-# import pytest
-# import requests
-# import yaml
-# from pytest_operator.plugin import OpsTest
-# from tenacity import retry, stop_after_attempt, wait_exponential
-
-# from tempo import Tempo
-# from tests.integration.helpers import get_relation_data
-
-# METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
-# APP_NAME = "tempo"
-# SSC = "self-signed-certificates"
-# SSC_APP_NAME = "ssc"
-# TRACEGEN_SCRIPT_PATH = Path() / "scripts" / "tracegen.py"
-# logger = logging.getLogger(__name__)
-
-
-# @pytest.fixture(scope="function")
-# def nonce():
-#     """Generate an integer nonce for easier trace querying."""
-#     return str(random.random())[2:]
-
-
-# def get_traces(tempo_host: str, nonce):
-#     url = "https://" + tempo_host + ":3200/api/search"
-#     req = requests.get(
-#         url,
-#         params={"q": f'{{ .nonce = "{nonce}" }}'},
-#         # it would fail to verify as the cert was issued for fqdn, not IP.
-#         verify=False,
-#     )
-#     assert req.status_code == 200
-#     return json.loads(req.text)["traces"]
-
-
-# @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
-# async def get_traces_patiently(ops_test, nonce):
-#     assert get_traces(await get_tempo_ip(ops_test), nonce=nonce)
-
-
-# async def get_tempo_ip(ops_test: OpsTest):
-#     status = await ops_test.model.get_status()
-#     app = status["applications"][APP_NAME]
-#     return app.public_address
-
-
-# async def get_tempo_internal_host(ops_test: OpsTest):
-#     return f"https://{APP_NAME}-0.{APP_NAME}-endpoints.{ops_test.model.name}.svc.cluster.local"
-
-
-# @pytest.fixture(scope="function")
-# def server_cert(ops_test: OpsTest):
-#     data = get_relation_data(
-#         requirer_endpoint=f"{APP_NAME}/0:certificates",
-#         provider_endpoint=f"{SSC_APP_NAME}/0:certificates",
-#         model=ops_test.model.name,
-#     )
-#     cert = json.loads(data.provider.application_data["certificates"])[0]["certificate"]
-
-#     with tempfile.NamedTemporaryFile() as f:
-#         p = Path(f.name)
-#         p.write_text(cert)
-#         yield p
-
-
-# async def emit_trace(ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False):
-#     """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
-#     hostname = await get_tempo_internal_host(ops_test)
-#     cmd = (
-#         f"juju ssh -m {ops_test.model_name} {APP_NAME}/0 "
-#         f"TRACEGEN_ENDPOINT={hostname}:4318/v1/traces "
-#         f"TRACEGEN_VERBOSE={verbose} "
-#         f"TRACEGEN_PROTOCOL={proto} "
-#         f"TRACEGEN_CERT={Tempo.server_cert_path if use_cert else ''} "
-#         f"TRACEGEN_NONCE={nonce} "
-#         "python3 tracegen.py"
-#     )
-
-#     return getoutput(cmd)
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_build_and_deploy(ops_test: OpsTest):
-#     tempo_charm = await ops_test.build_charm(".")
-#     resources = {
-#         "tempo-image": METADATA["resources"]["tempo-image"]["upstream-source"],
-#     }
-#     await asyncio.gather(
-#         ops_test.model.deploy(tempo_charm, resources=resources, application_name=APP_NAME),
-#         ops_test.model.deploy(SSC, application_name=SSC_APP_NAME),
-#     )
-
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(
-#             apps=[APP_NAME, SSC_APP_NAME],
-#             status="active",
-#             raise_on_blocked=True,
-#             timeout=10000,
-#             raise_on_error=False,
-#         ),
-#     )
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_relate(ops_test: OpsTest):
-#     await ops_test.model.integrate(APP_NAME + ":certificates", SSC_APP_NAME + ":certificates")
-#     await ops_test.model.wait_for_idle(
-#         apps=[APP_NAME, SSC_APP_NAME],
-#         status="active",
-#         timeout=1000,
-#     )
-
-
-# @pytest.mark.setup
-# @pytest.mark.abort_on_fail
-# async def test_push_tracegen_script_and_deps(ops_test: OpsTest):
-#     await ops_test.juju("scp", TRACEGEN_SCRIPT_PATH, f"{APP_NAME}/0:tracegen.py")
-#     await ops_test.juju(
-#         "ssh",
-#         f"{APP_NAME}/0",
-#         "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http",
-#     )
-
-
-# async def test_verify_trace_http_no_tls_fails(ops_test: OpsTest, server_cert, nonce):
-#     # IF tempo is related to SSC
-#     # WHEN we emit an http trace, **unsecured**
-#     await emit_trace(ops_test, nonce=nonce)  # this should fail
-#     # THEN we can verify it's not been ingested
-#     tempo_ip = await get_tempo_ip(ops_test)
-#     traces = get_traces(tempo_ip, nonce=nonce)
-#     assert not traces
-
-
-# async def test_verify_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
-#     # WHEN we emit a trace secured with TLS
-#     await emit_trace(ops_test, nonce=nonce, use_cert=True)
-#     # THEN we can verify it's eventually ingested
-#     await get_traces_patiently(ops_test, nonce)
-
-
-# @pytest.mark.xfail  # expected to fail because in this context the grpc receiver is not enabled
-# async def test_verify_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
-#     # WHEN we emit a trace secured with TLS
-#     await emit_trace(ops_test, nonce=nonce, verbose=1, proto="grpc", use_cert=True)
-#     # THEN we can verify it's been ingested
-#     await get_traces_patiently(ops_test, nonce)
-
-
-# @pytest.mark.teardown
-# @pytest.mark.abort_on_fail
-# async def test_remove_relation(ops_test: OpsTest):
-#     await ops_test.juju(
-#         "remove-relation", APP_NAME + ":certificates", SSC_APP_NAME + ":certificates"
-#     )
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(
-#             apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000
-#         ),
-#     )
+import asyncio
+import json
+import logging
+import random
+import tempfile
+from pathlib import Path
+from subprocess import getoutput
+
+import pytest
+import requests
+import yaml
+from helpers import deploy_cluster
+from juju.application import Application
+from pytest_operator.plugin import OpsTest
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from tempo import Tempo
+from tests.integration.helpers import get_relation_data
+
+METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
+APP_NAME = "tempo"
+SSC = "self-signed-certificates"
+SSC_APP_NAME = "ssc"
+TRACEGEN_SCRIPT_PATH = Path() / "scripts" / "tracegen.py"
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture(scope="function")
+def nonce():
+    """Generate an integer nonce for easier trace querying."""
+    return str(random.random())[2:]
+
+
+def get_traces(tempo_host: str, nonce):
+    url = "https://" + tempo_host + ":3200/api/search"
+    req = requests.get(
+        url,
+        params={"q": f'{{ .nonce = "{nonce}" }}'},
+        # it would fail to verify as the cert was issued for fqdn, not IP.
+        verify=False,
+    )
+    assert req.status_code == 200
+    return json.loads(req.text)["traces"]
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
+async def get_traces_patiently(ops_test, nonce):
+    assert get_traces(await get_tempo_ip(ops_test), nonce=nonce)
+
+
+async def get_tempo_ip(ops_test: OpsTest):
+    status = await ops_test.model.get_status()
+    app = status["applications"][APP_NAME]
+    return app.public_address
+
+
+async def get_tempo_internal_host(ops_test: OpsTest):
+    return f"{APP_NAME}-0.{APP_NAME}-endpoints.{ops_test.model.name}.svc.cluster.local"
+
+
+@pytest.fixture(scope="function")
+def server_cert(ops_test: OpsTest):
+    data = get_relation_data(
+        requirer_endpoint=f"{APP_NAME}/0:certificates",
+        provider_endpoint=f"{SSC_APP_NAME}/0:certificates",
+        model=ops_test.model.name,
+    )
+    cert = json.loads(data.provider.application_data["certificates"])[0]["certificate"]
+
+    with tempfile.NamedTemporaryFile() as f:
+        p = Path(f.name)
+        p.write_text(cert)
+        yield p
+
+
+async def emit_trace(ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False):
+    """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
+    hostname = await get_tempo_internal_host(ops_test)
+    port = "4317"
+    endpoint_postfix = ""
+    if proto == "http":
+        hostname = f"https://{hostname}"
+        port = "4318"
+        endpoint_postfix = "/v1/traces"
+
+    cmd = (
+        f"juju ssh -m {ops_test.model_name} {APP_NAME}/0 "
+        f"TRACEGEN_ENDPOINT={hostname}:{port}{endpoint_postfix} "
+        f"TRACEGEN_VERBOSE={verbose} "
+        f"TRACEGEN_PROTOCOL={proto} "
+        f"TRACEGEN_CERT={Tempo.tls_ca_path if use_cert else ''} "
+        f"TRACEGEN_NONCE={nonce} "
+        "python3 tracegen.py"
+    )
+
+    return getoutput(cmd)
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_build_and_deploy(ops_test: OpsTest):
+    # tempo_charm = await ops_test.build_charm(".")
+    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    resources = {
+        "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
+        "nginx-prometheus-exporter-image": METADATA["resources"][
+            "nginx-prometheus-exporter-image"
+        ]["upstream-source"],
+    }
+    await asyncio.gather(
+        ops_test.model.deploy(tempo_charm, resources=resources, application_name=APP_NAME),
+        ops_test.model.deploy(SSC, application_name=SSC_APP_NAME),
+    )
+
+    # deploy cluster
+    await deploy_cluster(ops_test)
+
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(
+            apps=[APP_NAME, SSC_APP_NAME],
+            status="active",
+            raise_on_blocked=True,
+            timeout=10000,
+            raise_on_error=False,
+        ),
+    )
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_relate(ops_test: OpsTest):
+    await ops_test.model.integrate(APP_NAME + ":certificates", SSC_APP_NAME + ":certificates")
+    await ops_test.model.wait_for_idle(
+        apps=[APP_NAME, SSC_APP_NAME],
+        status="active",
+        timeout=1000,
+    )
+
+
+@pytest.mark.setup
+@pytest.mark.abort_on_fail
+async def test_push_tracegen_script_and_deps(ops_test: OpsTest):
+    await ops_test.juju("scp", TRACEGEN_SCRIPT_PATH, f"{APP_NAME}/0:tracegen.py")
+    await ops_test.juju(
+        "ssh",
+        f"{APP_NAME}/0",
+        "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http",
+    )
+
+
+async def test_verify_trace_http_no_tls_fails(ops_test: OpsTest, server_cert, nonce):
+    # IF tempo is related to SSC
+    # WHEN we emit an http trace, **unsecured**
+    await emit_trace(ops_test, nonce=nonce)  # this should fail
+    # THEN we can verify it's not been ingested
+    tempo_ip = await get_tempo_ip(ops_test)
+    traces = get_traces(tempo_ip, nonce=nonce)
+    assert not traces
+
+
+@pytest.mark.abort_on_fail
+async def test_verify_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
+    # WHEN we emit a trace secured with TLS
+    await emit_trace(ops_test, nonce=nonce, use_cert=True)
+    # THEN we can verify it's eventually ingested
+    await get_traces_patiently(ops_test, nonce)
+
+
+@pytest.mark.abort_on_fail
+async def test_verify_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
+    # enable otlp grpc receiver
+    tempo_app: Application = ops_test.model.applications[APP_NAME]
+    await tempo_app.set_config(
+        {
+            "always_enable_otlp_grpc": "True",
+        }
+    )
+    await ops_test.model.wait_for_idle(
+        apps=[APP_NAME],
+        status="active",
+        timeout=1000,
+    )
+
+    # WHEN we emit a trace secured with TLS
+    await emit_trace(ops_test, nonce=nonce, verbose=1, proto="grpc", use_cert=True)
+    # THEN we can verify it's been ingested
+    await get_traces_patiently(ops_test, nonce)
+
+
+@pytest.mark.teardown
+@pytest.mark.abort_on_fail
+async def test_remove_relation(ops_test: OpsTest):
+    await ops_test.juju(
+        "remove-relation", APP_NAME + ":certificates", SSC_APP_NAME + ":certificates"
+    )
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(
+            apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000
+        ),
+    )
diff --git a/tests/integration/tester/src/charm.py b/tests/integration/tester/src/charm.py
index 72feedd..a5059a0 100755
--- a/tests/integration/tester/src/charm.py
+++ b/tests/integration/tester/src/charm.py
@@ -40,7 +40,7 @@ def __init__(self, *args):
         self.tracing = TracingEndpointRequirer(
             self, relation_name="tracing", protocols=["otlp_http", "otlp_grpc"]
         )
-        self.tempo_otlp_http_endpoint = charm_tracing_config(self.tracing, None)
+        self.tempo_otlp_http_endpoint, _ = charm_tracing_config(self.tracing, None)
         # Core lifecycle events
         self.framework.observe(self.on.config_changed, self._update)
 
diff --git a/tests/scenario/conftest.py b/tests/scenario/conftest.py
index d5826b9..f40f72d 100644
--- a/tests/scenario/conftest.py
+++ b/tests/scenario/conftest.py
@@ -1,16 +1,21 @@
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
+from cosl.coordinated_workers.nginx import Nginx
 from scenario import Container, Context, Relation
-from tempo_cluster import TempoClusterRequirerAppData, TempoRole
 
 from charm import TempoCoordinatorCharm
 
 
+@pytest.fixture()
+def coordinator():
+    return MagicMock()
+
+
 @pytest.fixture
 def tempo_charm():
     with patch("lightkube.core.client.GenericSyncClient"):
-        with patch("charm.TempoCoordinatorCharm._update_server_cert"):
+        with patch.object(Nginx, "are_certificates_on_disk", False):
             yield TempoCoordinatorCharm
 
 
@@ -42,7 +47,7 @@ def s3(s3_config):
 def all_worker():
     return Relation(
         "tempo-cluster",
-        remote_app_data=TempoClusterRequirerAppData(role=TempoRole.all).dump(),
+        remote_app_data={"role": '"all"'},
     )
 
 
diff --git a/tests/scenario/helpers.py b/tests/scenario/helpers.py
index dbe8452..445ca83 100644
--- a/tests/scenario/helpers.py
+++ b/tests/scenario/helpers.py
@@ -1,7 +1,7 @@
 import scenario
-from tempo_cluster import TempoClusterProviderAppData
+from cosl.coordinated_workers.interface import ClusterProviderAppData
 
 
 def get_tempo_config(state: scenario.State):
     cluster_relation = state.get_relations("tempo-cluster")[0]  # there's only one
-    return TempoClusterProviderAppData.load(cluster_relation.local_app_data).tempo_config
+    return ClusterProviderAppData.load(cluster_relation.local_app_data).worker_config
diff --git a/tests/scenario/test_charm_statuses.py b/tests/scenario/test_charm_statuses.py
index 5c16e6b..b623a74 100644
--- a/tests/scenario/test_charm_statuses.py
+++ b/tests/scenario/test_charm_statuses.py
@@ -3,8 +3,6 @@
 import ops
 from scenario import PeerRelation, State
 
-from tempo import Tempo
-
 
 def test_monolithic_status_no_s3_no_workers(context):
     state_out = context.run("start", State(unit_status=ops.ActiveStatus(), leader=True))
@@ -48,9 +46,14 @@ def test_scaled_status_with_s3_and_workers(
     assert state_out.unit_status.name == "active"
 
 
-@patch.object(Tempo, "is_ready", new=True)
+@patch("charm.TempoCoordinatorCharm.is_workload_ready", return_value=True)
 def test_happy_status(
-    context, s3, all_worker, nginx_container, nginx_prometheus_exporter_container
+    workload_ready_mock,
+    context,
+    s3,
+    all_worker,
+    nginx_container,
+    nginx_prometheus_exporter_container,
 ):
     state_out = context.run(
         "start",
diff --git a/tests/scenario/test_config.py b/tests/scenario/test_config.py
index 9c8e95a..63052f1 100644
--- a/tests/scenario/test_config.py
+++ b/tests/scenario/test_config.py
@@ -1,5 +1,6 @@
+import json
+
 from scenario import State
-from tempo_cluster import TempoClusterRequirerUnitData
 
 from charm import TempoCoordinatorCharm
 
@@ -10,18 +11,18 @@ def test_memberlist_multiple_members(
     workers_no = 3
     all_worker = all_worker.replace(
         remote_units_data={
-            worker_idx: TempoClusterRequirerUnitData(
-                **{
-                    "address": f"worker-{worker_idx}.test.svc.cluster.local:7946",
-                    "juju_topology": {
+            worker_idx: {
+                "address": json.dumps(f"worker-{worker_idx}.test.svc.cluster.local:7946"),
+                "juju_topology": json.dumps(
+                    {
                         "model": "test",
                         "unit": f"worker/{worker_idx}",
                         "model_uuid": "1",
                         "application": "worker",
                         "charm_name": "TempoWorker",
-                    },
-                }
-            ).dump()
+                    }
+                ),
+            }
             for worker_idx in range(workers_no)
         },
     )
@@ -32,7 +33,7 @@ def test_memberlist_multiple_members(
     )
     with context.manager(all_worker.changed_event, state) as mgr:
         charm: TempoCoordinatorCharm = mgr.charm
-        assert charm.tempo_cluster.gather_addresses() == set(
+        assert charm.coordinator.cluster.gather_addresses() == set(
             [
                 "worker-0.test.svc.cluster.local:7946",
                 "worker-1.test.svc.cluster.local:7946",
diff --git a/tests/scenario/test_enabled_receivers.py b/tests/scenario/test_enabled_receivers.py
index c77d8b0..eb63cad 100644
--- a/tests/scenario/test_enabled_receivers.py
+++ b/tests/scenario/test_enabled_receivers.py
@@ -53,7 +53,7 @@ def test_receivers_with_relations(
     action_out = context.run_action("list-receivers", state)
     assert action_out.results == {
         "otlp-http": f"http://{socket.getfqdn()}:4318",
-        "otlp-grpc": f"http://{socket.getfqdn()}:4317",
+        "otlp-grpc": f"{socket.getfqdn()}:4317",
     }
 
 
@@ -93,5 +93,5 @@ def test_receivers_with_relations_and_config(
     assert action_out.results == {
         "otlp-http": f"http://{socket.getfqdn()}:4318",
         "zipkin": f"http://{socket.getfqdn()}:9411",
-        "otlp-grpc": f"http://{socket.getfqdn()}:4317",
+        "otlp-grpc": f"{socket.getfqdn()}:4317",
     }
diff --git a/tests/scenario/test_ingressed_tracing.py b/tests/scenario/test_ingressed_tracing.py
index ec9c850..69a1916 100644
--- a/tests/scenario/test_ingressed_tracing.py
+++ b/tests/scenario/test_ingressed_tracing.py
@@ -5,8 +5,6 @@
 from charms.tempo_k8s.v1.charm_tracing import charm_tracing_disabled
 from scenario import Relation, State
 
-from tempo import Tempo
-
 
 @pytest.fixture
 def base_state(nginx_container, nginx_prometheus_exporter_container):
@@ -38,7 +36,7 @@ def test_ingress_relation_set_with_dynamic_config(context, base_state, s3, all_w
     ingress = Relation("ingress", remote_app_data={"external_host": "1.2.3.4", "scheme": "http"})
     state = base_state.replace(relations=[ingress, s3, all_worker])
 
-    with patch.object(Tempo, "is_ready", lambda _: False):
+    with patch("charm.TempoCoordinatorCharm.is_workload_ready", lambda _: False):
         out = context.run(ingress.joined_event, state)
 
     charm_name = "tempo-coordinator-k8s"
diff --git a/tests/scenario/test_nginx.py b/tests/scenario/test_nginx.py
index a629774..3c3786d 100644
--- a/tests/scenario/test_nginx.py
+++ b/tests/scenario/test_nginx.py
@@ -1,44 +1,23 @@
 import logging
 from typing import List
-from unittest.mock import MagicMock
 
 import pytest
-from nginx import Nginx
-from tempo_cluster import TempoClusterProvider
 
+from nginx_config import NginxConfig
 from tempo import Tempo
 
 logger = logging.getLogger(__name__)
 
 
-@pytest.fixture
-def tempo_cluster_provider():
-    cluster_mock = MagicMock()
-    return TempoClusterProvider(cluster_mock)
-
-
-def test_nginx_config_is_list_before_crossplane(context, nginx_container, tempo_cluster_provider):
-    unit = MagicMock()
-    unit.get_container = nginx_container
-    tempo_charm = MagicMock()
-    tempo_charm.unit = MagicMock(return_value=unit)
-
-    nginx = Nginx(tempo_charm, tempo_cluster_provider, "lolcathost")
-
-    prepared_config = nginx._prepare_config()
+def test_nginx_config_is_list_before_crossplane(context, nginx_container, coordinator):
+    nginx = NginxConfig("localhost")
+    prepared_config = nginx._prepare_config(coordinator)
     assert isinstance(prepared_config, List)
 
 
-def test_nginx_config_is_parsed_by_crossplane(context, nginx_container, tempo_cluster_provider):
-    unit = MagicMock()
-    unit.get_container = nginx_container
-    tempo_charm = MagicMock()
-    tempo_charm.unit = MagicMock(return_value=unit)
-
-    nginx = Nginx(tempo_charm, tempo_cluster_provider, "lolcathost")
-    logger.info(nginx._prepare_config())
-
-    prepared_config = nginx.config()
+def test_nginx_config_is_parsed_by_crossplane(context, nginx_container, coordinator):
+    nginx = NginxConfig("localhost")
+    prepared_config = nginx.config(coordinator)
     assert isinstance(prepared_config, str)
 
 
@@ -46,10 +25,7 @@ def test_nginx_config_is_parsed_by_crossplane(context, nginx_container, tempo_cl
     "addresses",
     (
         {},
-        {"all": {"1.2.3.4"}},
-        {"all": {"1.2.3.4", "1.2.3.5"}},
         {
-            "all": {"1.2.3.4"},
             "distributor": {"1.2.3.5"},
             "ingester": {"1.2.3.6"},
             "querier": {"1.2.4.7"},
@@ -83,29 +59,19 @@ def test_nginx_config_is_parsed_by_crossplane(context, nginx_container, tempo_cl
         },
     ),
 )
-def test_nginx_config_is_parsed_with_workers(
-    context, nginx_container, tempo_cluster_provider, addresses
-):
-    tempo_cluster_provider.gather_addresses_by_role = MagicMock(return_value=addresses)
+def test_nginx_config_is_parsed_with_workers(context, nginx_container, coordinator, addresses):
+    coordinator.cluster.gather_addresses_by_role.return_value = addresses
 
-    unit = MagicMock()
-    unit.get_container = nginx_container
-    tempo_charm = MagicMock()
-    tempo_charm.unit = MagicMock(return_value=unit)
+    nginx = NginxConfig("localhost")
 
-    nginx = Nginx(tempo_charm, tempo_cluster_provider, "lolcathost")
-
-    prepared_config = nginx.config()
+    prepared_config = nginx.config(coordinator)
     assert isinstance(prepared_config, str)
 
 
 @pytest.mark.parametrize(
     "addresses",
     (
-        {"all": {"1.2.3.4"}},
-        {"all": {"1.2.3.4", "1.2.3.5"}},
         {
-            "all": {"1.2.3.4"},
             "distributor": {"1.2.3.5"},
             "ingester": {"1.2.3.6"},
             "querier": {"1.2.4.7"},
@@ -124,23 +90,16 @@ def test_nginx_config_is_parsed_with_workers(
     ),
 )
 def test_nginx_config_contains_upstreams_and_proxy_pass(
-    context, nginx_container, tempo_cluster_provider, addresses
+    context, nginx_container, coordinator, addresses
 ):
-    tempo_cluster_provider.gather_addresses_by_role = MagicMock(return_value=addresses)
-
-    unit = MagicMock()
-    unit.get_container = nginx_container
-    tempo_charm = MagicMock()
-    tempo_charm.unit = MagicMock(return_value=unit)
+    coordinator.cluster.gather_addresses_by_role.return_value = addresses
 
-    nginx = Nginx(tempo_charm, tempo_cluster_provider, "lolcathost")
+    nginx = NginxConfig("localhost")
 
-    prepared_config = nginx.config()
+    prepared_config = nginx.config(coordinator)
 
     for role, addresses in addresses.items():
         for address in addresses:
-            if role == "all":
-                _assert_config_per_role(Tempo.all_ports, address, prepared_config)
             if role == "distributor":
                 _assert_config_per_role(Tempo.receiver_ports, address, prepared_config)
             if role == "query-frontend":
diff --git a/tests/scenario/test_tempo_clustered.py b/tests/scenario/test_tempo_clustered.py
index 27d7988..9be1129 100644
--- a/tests/scenario/test_tempo_clustered.py
+++ b/tests/scenario/test_tempo_clustered.py
@@ -1,37 +1,49 @@
 import datetime
+import json
 from unittest.mock import MagicMock, patch
 
 import pytest
 import scenario
 from charms.tempo_k8s.v2.tracing import TracingRequirerAppData
 from charms.tls_certificates_interface.v3.tls_certificates import ProviderCertificate
+from cosl.coordinated_workers.interface import ClusterProvider, ClusterProviderAppData
 from scenario import Relation, State
-from tempo_cluster import TempoClusterProviderAppData
 
 from charm import TempoCoordinatorCharm
 from tempo import Tempo
 from tests.scenario.helpers import get_tempo_config
 
 
+@pytest.fixture(scope="function")
+def coordinator_with_initial_config(s3_config):
+    new_coordinator_mock = MagicMock()
+    new_coordinator_mock.return_value.tls_available = False
+    new_coordinator_mock.return_value.hostname = "tempo-test-0.test.cluster.svc.local"
+    new_coordinator_mock.return_value._s3_config = s3_config
+    new_coordinator_mock.return_value.cluster.gather_addresses.return_value = {"localhost"}
+    new_coordinator_mock.return_value.cluster.gather_addresses_by_role.return_value = {
+        "query-frontend": {"localhost"},
+        "distributor": {"localhost"},
+    }
+
+    return new_coordinator_mock
+
+
 @pytest.fixture
-def all_worker_with_initial_config(all_worker: Relation, s3_config):
-    container = MagicMock()
-    container.can_connect = lambda: True
-    # prevent tls_ready from reporting True
-    container.exists = lambda path: (
-        False if path in [Tempo.tls_cert_path, Tempo.tls_key_path, Tempo.tls_ca_path] else True
-    )
-    initial_config = Tempo(container).generate_config(
-        ["otlp_http"], s3_config, {"all": "localhost"}
+def all_worker_with_initial_config(all_worker: Relation, coordinator_with_initial_config):
+
+    initial_config = Tempo(lambda: ("otlp_http",)).config(
+        coordinator_with_initial_config.return_value
     )
-    new_local_app_data = TempoClusterProviderAppData(
-        tempo_config=initial_config,
-        loki_endpoints={},
-        ca_cert="foo cert",
-        server_cert="bar cert",
-        privkey_secret_id="super secret",
-        tempo_receiver={"otlp_http": "https://foo.com/fake_receiver"},
-    ).dump()
+
+    new_local_app_data = {
+        "worker_config": json.dumps(initial_config),
+        "ca_cert": json.dumps("foo cert"),
+        "server_cert": json.dumps("bar cert"),
+        "privkey_secret_id": json.dumps("super secret"),
+        "tracing_receivers": json.dumps({"otlp_http": "https://foo.com/fake_receiver"}),
+    }
+
     return all_worker.replace(local_app_data=new_local_app_data)
 
 
@@ -79,16 +91,16 @@ def state_with_certs(
 def test_certs_ready(context, state_with_certs):
     with context.manager("update-status", state_with_certs) as mgr:
         charm: TempoCoordinatorCharm = mgr.charm
-        assert charm.cert_handler.server_cert == MOCK_SERVER_CERT
-        assert charm.cert_handler.ca_cert == MOCK_CA_CERT
-        assert charm.cert_handler.private_key
+        assert charm.coordinator.cert_handler.server_cert == MOCK_SERVER_CERT
+        assert charm.coordinator.cert_handler.ca_cert == MOCK_CA_CERT
+        assert charm.coordinator.cert_handler.private_key
 
 
 def test_cluster_relation(context, state_with_certs, all_worker):
     clustered_state = state_with_certs.replace(relations=state_with_certs.relations + [all_worker])
     state_out = context.run(all_worker.joined_event, clustered_state)
     cluster_out = state_out.get_relations(all_worker.endpoint)[0]
-    local_app_data = TempoClusterProviderAppData.load(cluster_out.local_app_data)
+    local_app_data = ClusterProviderAppData.load(cluster_out.local_app_data)
 
     assert local_app_data.ca_cert == MOCK_CA_CERT
     assert local_app_data.server_cert == MOCK_SERVER_CERT
@@ -97,22 +109,26 @@ def test_cluster_relation(context, state_with_certs, all_worker):
     # certhandler's vault uses revision 0 to store an uninitialized-vault marker
     assert secret.contents[1]["private-key"]
 
-    assert local_app_data.tempo_config
+    assert local_app_data.worker_config
 
 
+@patch.object(ClusterProvider, "gather_addresses")
 @pytest.mark.parametrize("requested_protocol", ("otlp_grpc", "zipkin"))
 def test_tempo_restart_on_ingress_v2_changed(
+    mock_cluster,
+    coordinator_with_initial_config,
     context,
-    tmp_path,
     requested_protocol,
     s3,
-    s3_config,
     all_worker_with_initial_config,
     nginx_container,
     nginx_prometheus_exporter_container,
 ):
+    mock_cluster.return_value = {"localhost"}
+
     # GIVEN
     # the remote end requests an otlp_grpc endpoint
+
     tracing = Relation(
         "tracing",
         remote_app_data=TracingRequirerAppData(receivers=[requested_protocol]).dump(),
@@ -125,12 +141,13 @@ def test_tempo_restart_on_ingress_v2_changed(
         relations=[tracing, s3, all_worker_with_initial_config],
         containers=[nginx_container, nginx_prometheus_exporter_container],
     )
+
     state_out = context.run(tracing.changed_event, state)
 
     # THEN
     # Tempo pushes a new config to the all_worker
     new_config = get_tempo_config(state_out)
-    expected_config = Tempo().generate_config(
-        ["otlp_http", requested_protocol], s3_config, {"all": "localhost"}
+    expected_config = Tempo(lambda: ["otlp_http", requested_protocol]).config(
+        coordinator_with_initial_config.return_value
     )
     assert new_config == expected_config
diff --git a/tests/scenario/test_tls.py b/tests/scenario/test_tls.py
index 8021733..777a0ee 100644
--- a/tests/scenario/test_tls.py
+++ b/tests/scenario/test_tls.py
@@ -4,10 +4,9 @@
 import pytest
 from charms.tempo_k8s.v1.charm_tracing import charm_tracing_disabled
 from charms.tempo_k8s.v2.tracing import TracingProviderAppData, TracingRequirerAppData
+from cosl.coordinated_workers.coordinator import Coordinator
 from scenario import Relation, Secret, State
 
-from charm import TempoCoordinatorCharm
-
 
 @pytest.fixture
 def base_state(nginx_container, nginx_prometheus_exporter_container):
@@ -38,9 +37,7 @@ def update_relations_tls_and_verify(
     tracing,
 ):
     state = base_state.replace(relations=relations)
-    with charm_tracing_disabled(), patch.object(
-        TempoCoordinatorCharm, "tls_available", local_has_tls
-    ):
+    with charm_tracing_disabled(), patch.object(Coordinator, "tls_available", local_has_tls):
         out = context.run(tracing.changed_event, state)
     tracing_provider_app_data = TracingProviderAppData.load(
         out.get_relations(tracing.endpoint)[0].local_app_data
diff --git a/tests/unit/test_coherence.py b/tests/unit/test_coherence.py
index c3d9ab0..ad0fca5 100644
--- a/tests/unit/test_coherence.py
+++ b/tests/unit/test_coherence.py
@@ -1,21 +1,17 @@
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import pytest as pytest
-from coordinator import (
+from cosl.coordinated_workers.coordinator import Coordinator
+
+from tempo_config import (
     MINIMAL_DEPLOYMENT,
     RECOMMENDED_DEPLOYMENT,
-    TempoCoordinator,
     TempoRole,
+    TempoRolesConfig,
 )
 
 
-def _to_endpoint_name(role: TempoRole):
-    return role.value.replace("_", "-")
-
-
-ALL_TEMPO_RELATION_NAMES = list(map(_to_endpoint_name, TempoRole))
-
-
+@patch("cosl.coordinated_workers.coordinator.Coordinator.__init__", return_value=None)
 @pytest.mark.parametrize(
     "roles, expected",
     (
@@ -26,13 +22,19 @@ def _to_endpoint_name(role: TempoRole):
         (RECOMMENDED_DEPLOYMENT, True),
     ),
 )
-def test_coherent(roles, expected):
-    mock = MagicMock()
-    mock.gather_roles = MagicMock(return_value=roles)
-    mc = TempoCoordinator(mock)
+def test_coherent(mock_coordinator, roles, expected):
+
+    mc = Coordinator(None, None, "", "", 0, None, None, None)
+    cluster_mock = MagicMock()
+    cluster_mock.gather_roles = MagicMock(return_value=roles)
+    mc.cluster = cluster_mock
+    mc._is_coherent = None
+    mc.roles_config = TempoRolesConfig()
+
     assert mc.is_coherent is expected
 
 
+@patch("cosl.coordinated_workers.coordinator.Coordinator.__init__", return_value=None)
 @pytest.mark.parametrize(
     "roles, expected",
     (
@@ -43,8 +45,12 @@ def test_coherent(roles, expected):
         (RECOMMENDED_DEPLOYMENT, True),
     ),
 )
-def test_recommended(roles, expected):
-    mock = MagicMock()
-    mock.gather_roles = MagicMock(return_value=roles)
-    mc = TempoCoordinator(mock)
+def test_recommended(mock_coordinator, roles, expected):
+    mc = Coordinator(None, None, "", "", 0, None, None, None)
+    cluster_mock = MagicMock()
+    cluster_mock.gather_roles = MagicMock(return_value=roles)
+    mc.cluster = cluster_mock
+    mc._is_recommended = None
+    mc.roles_config = TempoRolesConfig()
+
     assert mc.is_recommended is expected
diff --git a/tests/unit/test_tempo.py b/tests/unit/test_tempo.py
index ed0fbd2..27ca813 100644
--- a/tests/unit/test_tempo.py
+++ b/tests/unit/test_tempo.py
@@ -52,8 +52,8 @@
                         "thrift_http": {
                             "tls": {
                                 "ca_file": "/usr/local/share/ca-certificates/ca.crt",
-                                "cert_file": "/etc/tempo/tls/server.crt",
-                                "key_file": "/etc/tempo/tls/server.key",
+                                "cert_file": "/etc/worker/server.cert",
+                                "key_file": "/etc/worker/private.key",
                             }
                         },
                     }
@@ -61,8 +61,8 @@
                 "zipkin": {
                     "tls": {
                         "ca_file": "/usr/local/share/ca-certificates/ca.crt",
-                        "cert_file": "/etc/tempo/tls/server.crt",
-                        "key_file": "/etc/tempo/tls/server.key",
+                        "cert_file": "/etc/worker/server.cert",
+                        "key_file": "/etc/worker/private.key",
                     }
                 },
                 "otlp": {
@@ -70,8 +70,8 @@
                         "http": {
                             "tls": {
                                 "ca_file": "/usr/local/share/ca-certificates/ca.crt",
-                                "cert_file": "/etc/tempo/tls/server.crt",
-                                "key_file": "/etc/tempo/tls/server.key",
+                                "cert_file": "/etc/worker/server.cert",
+                                "key_file": "/etc/worker/private.key",
                             }
                         },
                     }
@@ -82,10 +82,7 @@
     ),
 )
 def test_tempo_distributor_config(protocols, use_tls, expected_config):
-    assert (
-        Tempo(None, use_tls=use_tls)._build_distributor_config(protocols).receivers
-        == expected_config
-    )
+    assert Tempo(None)._build_distributor_config(protocols, use_tls).receivers == expected_config
 
 
 @pytest.mark.parametrize(
@@ -108,4 +105,4 @@ def test_tempo_distributor_config(protocols, use_tls, expected_config):
     ),
 )
 def test_tempo_memberlist_config(peers, expected_config):
-    assert Tempo()._build_memberlist_config(peers) == expected_config
+    assert Tempo(None)._build_memberlist_config(peers) == expected_config

From 89512e82737a1c84e0e163bbf1e88c72a1eafa0a Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 22 Jul 2024 12:44:25 +0300
Subject: [PATCH 03/22] add self-tracing

---
 charmcraft.yaml                               |   5 +
 scripts/tracegen.py                           |   6 +-
 src/charm.py                                  |   6 +-
 src/tempo.py                                  |   9 +-
 src/tempo_config.py                           |  11 +-
 tests/integration/conftest.py                 |  30 ++++++
 tests/integration/helpers.py                  |  53 ++++++++-
 tests/integration/test_ingressed_tls.py       |  84 +++------------
 tests/integration/test_self_tracing.py        |  55 ++++++++++
 tests/integration/test_self_tracing_remote.py |  66 ++++++++++++
 tests/integration/test_tls.py                 | 101 ++++--------------
 tests/scenario/test_tempo_clustered.py        |   9 +-
 12 files changed, 264 insertions(+), 171 deletions(-)
 create mode 100644 tests/integration/test_self_tracing.py
 create mode 100644 tests/integration/test_self_tracing_remote.py

diff --git a/charmcraft.yaml b/charmcraft.yaml
index fdcbc28..104cea2 100644
--- a/charmcraft.yaml
+++ b/charmcraft.yaml
@@ -67,6 +67,11 @@ provides:
 
 
 requires:
+  self-tracing:
+    interface: tracing
+    description: |
+      Integration to enable Tempo to send its own traces to itself or to another Tempo instance. 
+    limit: 1
   s3:
     interface: s3
     limit: 1
diff --git a/scripts/tracegen.py b/scripts/tracegen.py
index 3505be4..c5fcd5f 100644
--- a/scripts/tracegen.py
+++ b/scripts/tracegen.py
@@ -36,12 +36,12 @@ def emit_trace(
         return (emit_trace(endpoint, log_trace_to_console, cert, "grpc", nonce=nonce) and
                 emit_trace(endpoint, log_trace_to_console, cert, "http", nonce=nonce))
 
-    return _export_trace(span_exporter, log_trace_to_console=log_trace_to_console, nonce=nonce)
+    return _export_trace(span_exporter, log_trace_to_console=log_trace_to_console, nonce=nonce, protocol = protocol)
 
 
-def _export_trace(span_exporter, log_trace_to_console: bool = False, nonce: Any = None):
+def _export_trace(span_exporter, log_trace_to_console: bool = False, nonce: Any = None, protocol: Literal["grpc", "http"] = "http"):
     resource = Resource.create(attributes={
-        "service.name": "tracegen",
+        "service.name": f"tracegen{protocol}",
         "nonce": str(nonce)
     }
     )
diff --git a/src/charm.py b/src/charm.py
index 55b41b4..85f53a5 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -58,6 +58,7 @@ def __init__(self, *args):
                 "logging": "logging",
                 "metrics": "metrics-endpoint",
                 "s3": "s3",
+                "tracing": "self-tracing",
             },
             nginx_config=NginxConfig(server_name=self.hostname).config,
             workers_config=self.tempo.config,
@@ -287,7 +288,10 @@ def tempo_otlp_http_endpoint(self) -> Optional[str]:
         """Endpoint at which the charm tracing information will be forwarded."""
         # the charm container and the tempo workload container have apparently the same
         # IP, so we can talk to tempo at localhost.
-        if self.is_workload_ready():
+        if hasattr(self, "coordinator") and self.coordinator.tracing.is_ready():
+            return self.coordinator.tracing.get_endpoint("otlp_http")
+        # In absence of another Tempo instance, we don't want to lose this instance's charm traces
+        elif self.is_workload_ready():
             return f"{self._internal_url}:{self.tempo.receiver_ports['otlp_http']}"
 
     def requested_receivers_urls(self) -> Dict[str, str]:
diff --git a/src/tempo.py b/src/tempo.py
index 04902e6..93b08fb 100644
--- a/src/tempo.py
+++ b/src/tempo.py
@@ -110,7 +110,7 @@ def config(
             )
             config.memberlist = config.memberlist.model_copy(update=tls_config)
 
-        return yaml.dump(config.model_dump(mode="json", exclude_none=True))
+        return yaml.dump(config.model_dump(mode="json", by_alias=True, exclude_none=True))
 
     def _build_server_config(self, use_tls=False):
         server_config = tempo_config.Server(
@@ -141,12 +141,7 @@ def _build_storage_config(self, s3_config: dict):
                 queue_depth=20000,
             ),
             backend="s3",
-            s3=tempo_config.S3(
-                bucket=s3_config["bucket"],
-                access_key=s3_config["access-key"],
-                endpoint=s3_config["endpoint"],
-                secret_key=s3_config["secret-key"],
-            ),
+            s3=tempo_config.S3(**s3_config),
             # starting from Tempo 2.4, we need to use at least parquet v3 to have search capabilities (Grafana support)
             # https://grafana.com/docs/tempo/latest/release-notes/v2-4/#vparquet3-is-now-the-default-block-format
             block=tempo_config.Block(version="vParquet3"),
diff --git a/src/tempo_config.py b/src/tempo_config.py
index c487dac..c54cbba 100644
--- a/src/tempo_config.py
+++ b/src/tempo_config.py
@@ -10,7 +10,7 @@
 from typing import Any, Dict, Iterable, List, Mapping, Optional
 from urllib.parse import urlparse
 
-from pydantic import BaseModel, ConfigDict, field_validator, model_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 logger = logging.getLogger(__name__)
 
@@ -236,10 +236,13 @@ class Wal(BaseModel):
 class S3(BaseModel):
     """S3 config schema."""
 
-    bucket: str
-    access_key: str
+    model_config = ConfigDict(populate_by_name=True)
+    """Pydantic config."""
+
+    bucket_name: str = Field(alias="bucket")
+    access_key_id: str = Field(alias="access_key")
     endpoint: str
-    secret_key: str
+    secret_access_key: str = Field(alias="secret_key")
     insecure: bool = False
 
     @model_validator(mode="before")  # pyright: ignore
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index fd3d087..d1df125 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,13 +1,22 @@
 # Copyright 2021 Canonical Ltd.
 # See LICENSE file for licensing details.
+import json
 import logging
 import os
+import random
 import shutil
+import tempfile
 from pathlib import Path
 
 from pytest import fixture
 from pytest_operator.plugin import OpsTest
 
+from tests.integration.helpers import get_relation_data
+
+APP_NAME = "tempo"
+SSC = "self-signed-certificates"
+SSC_APP_NAME = "ssc"
+
 logger = logging.getLogger(__name__)
 
 
@@ -63,3 +72,24 @@ def copy_charm_libs_into_tester_grpc_charm(ops_test):
     # cleanup: remove all libs
     for path in copies:
         Path(path).unlink()
+
+
+@fixture(scope="function")
+def server_cert(ops_test: OpsTest):
+    data = get_relation_data(
+        requirer_endpoint=f"{APP_NAME}/0:certificates",
+        provider_endpoint=f"{SSC_APP_NAME}/0:certificates",
+        model=ops_test.model.name,
+    )
+    cert = json.loads(data.provider.application_data["certificates"])[0]["certificate"]
+
+    with tempfile.NamedTemporaryFile() as f:
+        p = Path(f.name)
+        p.write_text(cert)
+        yield p
+
+
+@fixture(scope="function")
+def nonce():
+    """Generate an integer nonce for easier trace querying."""
+    return str(random.random())[2:]
diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
index 059ee5a..4f8542e 100644
--- a/tests/integration/helpers.py
+++ b/tests/integration/helpers.py
@@ -8,11 +8,15 @@
 from pathlib import Path
 from typing import Dict, Literal
 
+import requests
 import yaml
 from juju.application import Application
 from juju.unit import Unit
 from minio import Minio
 from pytest_operator.plugin import OpsTest
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from tempo import Tempo
 
 _JUJU_DATA_CACHE = {}
 _JUJU_KEYS = ("egress-subnets", "ingress-address", "private-address")
@@ -278,7 +282,7 @@ async def deploy_and_configure_minio(ops_test: OpsTest):
     assert action_result.status == "completed"
 
 
-async def deploy_cluster(ops_test: OpsTest):
+async def deploy_cluster(ops_test: OpsTest, tempo_app=APP_NAME):
     # await ops_test.model.deploy(FACADE, channel="edge")
     # await ops_test.model.wait_for_idle(
     #     apps=[FACADE], raise_on_blocked=True, status="active", timeout=2000
@@ -296,14 +300,55 @@ async def deploy_cluster(ops_test: OpsTest):
     )
     await ops_test.model.deploy(S3_INTEGRATOR, channel="edge")
 
-    await ops_test.model.integrate(APP_NAME + ":s3", S3_INTEGRATOR + ":s3-credentials")
-    await ops_test.model.integrate(APP_NAME + ":tempo-cluster", WORKER_NAME + ":tempo-cluster")
+    await ops_test.model.integrate(tempo_app + ":s3", S3_INTEGRATOR + ":s3-credentials")
+    await ops_test.model.integrate(tempo_app + ":tempo-cluster", WORKER_NAME + ":tempo-cluster")
 
     await deploy_and_configure_minio(ops_test)
 
     await ops_test.model.wait_for_idle(
-        apps=[APP_NAME, WORKER_NAME, S3_INTEGRATOR],
+        apps=[tempo_app, WORKER_NAME, S3_INTEGRATOR],
         status="active",
         timeout=1000,
         idle_period=30,
     )
+
+
+def get_traces(tempo_host: str, service_name="tracegenhttp", tls=True):
+    url = f"{'https' if tls else 'http'}://{tempo_host}:3200/api/search?tags=service.name={service_name}"
+    req = requests.get(
+        url,
+        verify=False,
+    )
+    assert req.status_code == 200
+    traces = json.loads(req.text)["traces"]
+    return traces
+
+
+@retry(stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=4, max=10))
+async def get_traces_patiently(tempo_host, service_name="tracegenhttp", tls=True):
+    traces = get_traces(tempo_host, service_name=service_name, tls=tls)
+    assert len(traces) > 0
+    return traces
+
+
+async def emit_trace(
+    endpoint, ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False
+):
+    """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
+    cmd = (
+        f"juju ssh -m {ops_test.model_name} {APP_NAME}/0 "
+        f"TRACEGEN_ENDPOINT={endpoint} "
+        f"TRACEGEN_VERBOSE={verbose} "
+        f"TRACEGEN_PROTOCOL={proto} "
+        f"TRACEGEN_CERT={Tempo.tls_ca_path if use_cert else ''} "
+        f"TRACEGEN_NONCE={nonce} "
+        "python3 tracegen.py"
+    )
+
+    return subprocess.getoutput(cmd)
+
+
+async def get_application_ip(ops_test: OpsTest, app_name: str):
+    status = await ops_test.model.get_status()
+    app = status["applications"][app_name]
+    return app.public_address
diff --git a/tests/integration/test_ingressed_tls.py b/tests/integration/test_ingressed_tls.py
index 10b8275..6d46d0c 100644
--- a/tests/integration/test_ingressed_tls.py
+++ b/tests/integration/test_ingressed_tls.py
@@ -1,24 +1,16 @@
 import asyncio
-import json
 import logging
-import random
-import subprocess
-import tempfile
 from pathlib import Path
 
 import pytest
-import requests
 import yaml
-from helpers import deploy_cluster
+from helpers import deploy_cluster, emit_trace, get_traces_patiently
 from juju.application import Application
 from pytest_operator.plugin import OpsTest
-from tenacity import retry, stop_after_attempt, wait_exponential
-
-from tempo import Tempo
-from tests.integration.helpers import get_relation_data
 
 METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
 APP_NAME = "tempo"
+WORKER_NAME = "tempo-worker"
 SSC = "self-signed-certificates"
 SSC_APP_NAME = "ssc"
 TRAEFIK = "traefik-k8s"
@@ -28,63 +20,15 @@
 logger = logging.getLogger(__name__)
 
 
-@pytest.fixture(scope="function")
-def nonce():
-    """Generate an integer nonce for easier trace querying."""
-    return str(random.random())[2:]
-
-
-@pytest.fixture(scope="function")
-def server_cert(ops_test: OpsTest):
-    data = get_relation_data(
-        requirer_endpoint=f"{APP_NAME}/0:certificates",
-        provider_endpoint=f"{SSC_APP_NAME}/0:certificates",
-        model=ops_test.model.name,
-    )
-    cert = json.loads(data.provider.application_data["certificates"])[0]["certificate"]
-
-    with tempfile.NamedTemporaryFile() as f:
-        p = Path(f.name)
-        p.write_text(cert)
-        yield p
-
-
-def get_traces(tempo_host: str, nonce, service_name="tracegen"):
-    req = requests.get(
-        "https://" + tempo_host + ":3200/api/search",
-        params={"service.name": service_name, "nonce": nonce},
-        verify=False,
-    )
-    assert req.status_code == 200
-    return json.loads(req.text)["traces"]
-
-
-@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
-async def get_traces_patiently(tempo_host, nonce):
-    assert get_traces(tempo_host, nonce=nonce)
-
-
-async def get_tempo_host(ops_test: OpsTest):
+async def get_ingress_proxied_endpoint(ops_test: OpsTest):
     status = await ops_test.model.get_status()
     app = status["applications"][TRAEFIK_APP_NAME]
-    return app.public_address
-
-
-async def emit_trace(
-    endpoint, ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False
-):
-    """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
-    cmd = (
-        f"juju ssh -m {ops_test.model_name} {APP_NAME}/0 "
-        f"TRACEGEN_ENDPOINT={endpoint} "
-        f"TRACEGEN_VERBOSE={verbose} "
-        f"TRACEGEN_PROTOCOL={proto} "
-        f"TRACEGEN_CERT={Tempo.tls_ca_path if use_cert else ''} "
-        f"TRACEGEN_NONCE={nonce} "
-        "python3 tracegen.py"
-    )
+    status_msg = app["status"]["info"]
 
-    return subprocess.getoutput(cmd)
+    # hacky way to get ingress hostname
+    if "Serving at" not in status_msg:
+        assert False, f"Ingressed hostname is not present in {TRAEFIK_APP_NAME} status message."
+    return status_msg.replace("Serving at", "").strip()
 
 
 @pytest.mark.setup
@@ -138,9 +82,11 @@ async def test_relate(ops_test: OpsTest):
     )
     await ops_test.model.integrate(APP_NAME + ":ingress", TRAEFIK_APP_NAME + ":traefik-route")
     await ops_test.model.wait_for_idle(
-        apps=[APP_NAME, SSC_APP_NAME, TRAEFIK_APP_NAME],
+        apps=[APP_NAME, SSC_APP_NAME, TRAEFIK_APP_NAME, WORKER_NAME],
         status="active",
         timeout=1000,
+        # make idle period 1 minute, as Tempo workload might not be up yet
+        idle_period=60,
     )
 
 
@@ -160,13 +106,13 @@ async def test_relate(ops_test: OpsTest):
 
 @pytest.mark.abort_on_fail
 async def test_verify_ingressed_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
-    tempo_host = await get_tempo_host(ops_test)
+    tempo_host = await get_ingress_proxied_endpoint(ops_test)
 
     await emit_trace(
         f"https://{tempo_host}:4318/v1/traces", nonce=nonce, ops_test=ops_test, use_cert=True
     )
     # THEN we can verify it's been ingested
-    assert get_traces_patiently(tempo_host, nonce=nonce)
+    assert await get_traces_patiently(tempo_host)
 
 
 @pytest.mark.abort_on_fail
@@ -184,13 +130,13 @@ async def test_verify_ingressed_traces_grpc_tls(ops_test: OpsTest, nonce, server
         timeout=1000,
     )
 
-    tempo_host = await get_tempo_host(ops_test)
+    tempo_host = await get_ingress_proxied_endpoint(ops_test)
 
     await emit_trace(
         f"{tempo_host}:4317", nonce=nonce, proto="grpc", ops_test=ops_test, use_cert=True
     )
     # THEN we can verify it's been ingested
-    assert get_traces_patiently(tempo_host, nonce=nonce)
+    assert await get_traces_patiently(tempo_host, service_name="grpc")
 
 
 @pytest.mark.teardown
diff --git a/tests/integration/test_self_tracing.py b/tests/integration/test_self_tracing.py
new file mode 100644
index 0000000..01badd9
--- /dev/null
+++ b/tests/integration/test_self_tracing.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# Copyright 2024 Ubuntu
+# See LICENSE file for licensing details.
+
+import asyncio
+import logging
+from pathlib import Path
+
+import pytest
+import yaml
+from helpers import deploy_cluster, get_application_ip, get_traces_patiently
+from pytest_operator.plugin import OpsTest
+
+logger = logging.getLogger(__name__)
+
+METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
+APP_NAME = "tempo"
+
+
+@pytest.mark.abort_on_fail
+async def test_build_and_deploy(ops_test: OpsTest):
+    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    resources = {
+        "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
+        "nginx-prometheus-exporter-image": METADATA["resources"][
+            "nginx-prometheus-exporter-image"
+        ]["upstream-source"],
+    }
+
+    await asyncio.gather(
+        ops_test.model.deploy(tempo_charm, resources=resources, application_name=APP_NAME),
+    )
+
+    # deploy cluster
+    await deploy_cluster(ops_test, APP_NAME)
+
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(status="active", raise_on_blocked=True, timeout=1000)
+    )
+
+
+@pytest.mark.abort_on_fail
+async def test_verify_trace_http_self(ops_test: OpsTest):
+    # adjust update-status interval to generate a charm tracing span faster
+    await ops_test.model.set_config({"update-status-hook-interval": "5s"})
+
+    # Verify traces from `tempo` are ingested into self Tempo
+    assert await get_traces_patiently(
+        await get_application_ip(ops_test, APP_NAME),
+        service_name="tempo-charm",
+        tls=False,
+    )
+
+    # adjust back to the default interval time
+    await ops_test.model.set_config({"update-status-hook-interval": "5m"})
diff --git a/tests/integration/test_self_tracing_remote.py b/tests/integration/test_self_tracing_remote.py
new file mode 100644
index 0000000..0cfcd60
--- /dev/null
+++ b/tests/integration/test_self_tracing_remote.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+# Copyright 2024 Ubuntu
+# See LICENSE file for licensing details.
+
+import asyncio
+import logging
+from pathlib import Path
+
+import pytest
+import yaml
+from helpers import deploy_cluster, get_application_ip, get_traces_patiently
+from pytest_operator.plugin import OpsTest
+
+logger = logging.getLogger(__name__)
+
+METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
+APP_NAME = "tempo"
+APP_REMOTE_NAME = "tempo-source"
+
+
+@pytest.mark.abort_on_fail
+async def test_build_and_deploy(ops_test: OpsTest):
+    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    resources = {
+        "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
+        "nginx-prometheus-exporter-image": METADATA["resources"][
+            "nginx-prometheus-exporter-image"
+        ]["upstream-source"],
+    }
+
+    await asyncio.gather(
+        ops_test.model.deploy(tempo_charm, resources=resources, application_name=APP_REMOTE_NAME),
+        ops_test.model.deploy(tempo_charm, resources=resources, application_name=APP_NAME),
+    )
+
+    # deploy cluster
+    await deploy_cluster(ops_test, APP_NAME)
+
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(status="active", raise_on_blocked=True, timeout=1000)
+    )
+
+
+@pytest.mark.abort_on_fail
+async def test_relate(ops_test: OpsTest):
+    await ops_test.model.integrate(APP_NAME + ":tracing", APP_REMOTE_NAME + ":self-tracing")
+    await ops_test.model.wait_for_idle(
+        status="active",
+        timeout=1000,
+    )
+
+
+@pytest.mark.abort_on_fail
+async def test_verify_trace_http(ops_test: OpsTest):
+    # adjust update-status interval to generate a charm tracing span faster
+    await ops_test.model.set_config({"update-status-hook-interval": "5s"})
+
+    # Verify traces from `tempo-source` are ingested into remote tempo instance
+    assert await get_traces_patiently(
+        await get_application_ip(ops_test, APP_NAME),
+        service_name="tempo-source-charm",
+        tls=False,
+    )
+
+    # adjust back to the default interval time
+    await ops_test.model.set_config({"update-status-hook-interval": "5m"})
diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py
index 5781f56..34a8655 100644
--- a/tests/integration/test_tls.py
+++ b/tests/integration/test_tls.py
@@ -1,21 +1,18 @@
 import asyncio
-import json
 import logging
-import random
-import tempfile
 from pathlib import Path
-from subprocess import getoutput
 
 import pytest
-import requests
 import yaml
-from helpers import deploy_cluster
+from helpers import (
+    deploy_cluster,
+    emit_trace,
+    get_application_ip,
+    get_traces,
+    get_traces_patiently,
+)
 from juju.application import Application
 from pytest_operator.plugin import OpsTest
-from tenacity import retry, stop_after_attempt, wait_exponential
-
-from tempo import Tempo
-from tests.integration.helpers import get_relation_data
 
 METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
 APP_NAME = "tempo"
@@ -25,75 +22,15 @@
 logger = logging.getLogger(__name__)
 
 
-@pytest.fixture(scope="function")
-def nonce():
-    """Generate an integer nonce for easier trace querying."""
-    return str(random.random())[2:]
-
-
-def get_traces(tempo_host: str, nonce):
-    url = "https://" + tempo_host + ":3200/api/search"
-    req = requests.get(
-        url,
-        params={"q": f'{{ .nonce = "{nonce}" }}'},
-        # it would fail to verify as the cert was issued for fqdn, not IP.
-        verify=False,
-    )
-    assert req.status_code == 200
-    return json.loads(req.text)["traces"]
-
-
-@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
-async def get_traces_patiently(ops_test, nonce):
-    assert get_traces(await get_tempo_ip(ops_test), nonce=nonce)
-
-
-async def get_tempo_ip(ops_test: OpsTest):
-    status = await ops_test.model.get_status()
-    app = status["applications"][APP_NAME]
-    return app.public_address
-
-
-async def get_tempo_internal_host(ops_test: OpsTest):
-    return f"{APP_NAME}-0.{APP_NAME}-endpoints.{ops_test.model.name}.svc.cluster.local"
-
-
-@pytest.fixture(scope="function")
-def server_cert(ops_test: OpsTest):
-    data = get_relation_data(
-        requirer_endpoint=f"{APP_NAME}/0:certificates",
-        provider_endpoint=f"{SSC_APP_NAME}/0:certificates",
-        model=ops_test.model.name,
-    )
-    cert = json.loads(data.provider.application_data["certificates"])[0]["certificate"]
-
-    with tempfile.NamedTemporaryFile() as f:
-        p = Path(f.name)
-        p.write_text(cert)
-        yield p
-
-
-async def emit_trace(ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False):
-    """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
-    hostname = await get_tempo_internal_host(ops_test)
+async def get_tempo_traces_internal_endpoint(ops_test: OpsTest, proto="http"):
+    hostname = f"{APP_NAME}-0.{APP_NAME}-endpoints.{ops_test.model.name}.svc.cluster.local"
     port = "4317"
     endpoint_postfix = ""
     if proto == "http":
         hostname = f"https://{hostname}"
         port = "4318"
         endpoint_postfix = "/v1/traces"
-
-    cmd = (
-        f"juju ssh -m {ops_test.model_name} {APP_NAME}/0 "
-        f"TRACEGEN_ENDPOINT={hostname}:{port}{endpoint_postfix} "
-        f"TRACEGEN_VERBOSE={verbose} "
-        f"TRACEGEN_PROTOCOL={proto} "
-        f"TRACEGEN_CERT={Tempo.tls_ca_path if use_cert else ''} "
-        f"TRACEGEN_NONCE={nonce} "
-        "python3 tracegen.py"
-    )
-
-    return getoutput(cmd)
+    return f"{hostname}:{port}{endpoint_postfix}"
 
 
 @pytest.mark.setup
@@ -151,19 +88,20 @@ async def test_push_tracegen_script_and_deps(ops_test: OpsTest):
 async def test_verify_trace_http_no_tls_fails(ops_test: OpsTest, server_cert, nonce):
     # IF tempo is related to SSC
     # WHEN we emit an http trace, **unsecured**
-    await emit_trace(ops_test, nonce=nonce)  # this should fail
+    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test)
+    await emit_trace(tempo_endpoint, ops_test, nonce=nonce)  # this should fail
     # THEN we can verify it's not been ingested
-    tempo_ip = await get_tempo_ip(ops_test)
-    traces = get_traces(tempo_ip, nonce=nonce)
-    assert not traces
+    traces = get_traces(await get_application_ip(ops_test, APP_NAME))
+    assert len(traces) == 0
 
 
 @pytest.mark.abort_on_fail
 async def test_verify_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
     # WHEN we emit a trace secured with TLS
-    await emit_trace(ops_test, nonce=nonce, use_cert=True)
+    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test)
+    await emit_trace(tempo_endpoint, ops_test, nonce=nonce, use_cert=True)
     # THEN we can verify it's eventually ingested
-    await get_traces_patiently(ops_test, nonce)
+    await get_traces_patiently(await get_application_ip(ops_test, APP_NAME))
 
 
 @pytest.mark.abort_on_fail
@@ -181,10 +119,11 @@ async def test_verify_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
         timeout=1000,
     )
 
+    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test, proto="grpc")
     # WHEN we emit a trace secured with TLS
-    await emit_trace(ops_test, nonce=nonce, verbose=1, proto="grpc", use_cert=True)
+    await emit_trace(tempo_endpoint, ops_test, nonce=nonce, verbose=1, proto="grpc", use_cert=True)
     # THEN we can verify it's been ingested
-    await get_traces_patiently(ops_test, nonce)
+    await get_traces_patiently(await get_application_ip(ops_test, APP_NAME), service_name="grpc")
 
 
 @pytest.mark.teardown
diff --git a/tests/scenario/test_tempo_clustered.py b/tests/scenario/test_tempo_clustered.py
index 9be1129..1e264f5 100644
--- a/tests/scenario/test_tempo_clustered.py
+++ b/tests/scenario/test_tempo_clustered.py
@@ -15,11 +15,16 @@
 
 
 @pytest.fixture(scope="function")
-def coordinator_with_initial_config(s3_config):
+def coordinator_with_initial_config():
     new_coordinator_mock = MagicMock()
     new_coordinator_mock.return_value.tls_available = False
     new_coordinator_mock.return_value.hostname = "tempo-test-0.test.cluster.svc.local"
-    new_coordinator_mock.return_value._s3_config = s3_config
+    new_coordinator_mock.return_value._s3_config = {
+        "access_key_id": "key",
+        "bucket_name": "tempo",
+        "endpoint": "http://1.2.3.4:9000",
+        "secret_access_key": "soverysecret",
+    }
     new_coordinator_mock.return_value.cluster.gather_addresses.return_value = {"localhost"}
     new_coordinator_mock.return_value.cluster.gather_addresses_by_role.return_value = {
         "query-frontend": {"localhost"},

From dad257a3201b0d84390881e0523d1e2d417dddf9 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 22 Jul 2024 16:38:00 +0300
Subject: [PATCH 04/22] address comments

---
 charmcraft.yaml | 2 +-
 src/charm.py    | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/charmcraft.yaml b/charmcraft.yaml
index 104cea2..809c523 100644
--- a/charmcraft.yaml
+++ b/charmcraft.yaml
@@ -70,7 +70,7 @@ requires:
   self-tracing:
     interface: tracing
     description: |
-      Integration to enable Tempo to send its own traces to itself or to another Tempo instance. 
+      Integration to enable Tempo to send its own traces to another Tempo instance. 
     limit: 1
   s3:
     interface: s3
diff --git a/src/charm.py b/src/charm.py
index 85f53a5..e8a9e9c 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -277,10 +277,9 @@ def _update_server_ca_cert(self) -> None:
         """Server CA certificate for charm tracing tls, if tls is enabled."""
         server_ca_cert = Path(self.tempo.tls_ca_path)
         if self.coordinator.tls_available:
-            if not server_ca_cert.exists():
+            if self.coordinator.cert_handler.ca_cert:
                 server_ca_cert.parent.mkdir(parents=True, exist_ok=True)
-                if self.coordinator.cert_handler.ca_cert:
-                    server_ca_cert.write_text(self.coordinator.cert_handler.ca_cert)
+                server_ca_cert.write_text(self.coordinator.cert_handler.ca_cert)
         else:  # tls unavailable: delete local cert
             server_ca_cert.unlink(missing_ok=True)
 

From 6f65396cc4b63ec8d9ce763940e420c2da3615a4 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 22 Jul 2024 16:40:17 +0300
Subject: [PATCH 05/22] remove metrics-generator from minimal deployment

---
 src/tempo_config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tempo_config.py b/src/tempo_config.py
index c54cbba..1a6454e 100644
--- a/src/tempo_config.py
+++ b/src/tempo_config.py
@@ -61,7 +61,6 @@ def all_nonmeta():
     TempoRole.ingester: 1,
     TempoRole.distributor: 1,
     TempoRole.compactor: 1,
-    TempoRole.metrics_generator: 1,
 }
 """The minimal set of roles that need to be allocated for the
 deployment to be considered consistent (otherwise we set blocked)."""

From caeb90fbc8ffcb77eca77a4b2d1c3934b8407428 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 22 Jul 2024 19:21:31 +0300
Subject: [PATCH 06/22] add latest cosl

---
 requirements.txt                              |  2 +-
 src/charm.py                                  |  2 +-
 src/tempo_config.py                           | 15 +++++++--------
 tests/integration/helpers.py                  |  2 +-
 tests/integration/test_ingressed_tls.py       |  3 +--
 tests/integration/test_integration.py         |  3 +--
 tests/integration/test_scaling_monolithic.py  |  3 +--
 tests/integration/test_self_tracing.py        |  2 +-
 tests/integration/test_self_tracing_remote.py |  5 +++--
 tests/integration/test_tls.py                 |  3 +--
 tox.ini                                       |  2 ++
 11 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 3834f32..6fa4fec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,4 +19,4 @@ cryptography
 # lib/charms/tempo_k8s/v1/tracing.py
 pydantic>=2
 # lib/charms/prometheus_k8s/v0/prometheus_scrape.py
-cosl
+cosl>=0.0.14
diff --git a/src/charm.py b/src/charm.py
index e8a9e9c..754bfac 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -50,7 +50,7 @@ def __init__(self, *args):
             roles_config=TempoRolesConfig(),
             s3_bucket_name=Tempo.s3_bucket_name,
             external_url=self._external_url,
-            worker_metrics_port="8080",
+            worker_metrics_port=8080,
             endpoints={
                 "certificates": "certificates",
                 "cluster": "tempo-cluster",
diff --git a/src/tempo_config.py b/src/tempo_config.py
index 1a6454e..20ef054 100644
--- a/src/tempo_config.py
+++ b/src/tempo_config.py
@@ -46,7 +46,6 @@ def all_nonmeta():
             TempoRole.ingester,
             TempoRole.distributor,
             TempoRole.compactor,
-            TempoRole.metrics_generator,
         }
 
 
@@ -66,12 +65,12 @@ def all_nonmeta():
 deployment to be considered consistent (otherwise we set blocked)."""
 
 RECOMMENDED_DEPLOYMENT = {
-    TempoRole.querier: 1,
-    TempoRole.query_frontend: 1,
-    TempoRole.ingester: 3,
-    TempoRole.distributor: 1,
-    TempoRole.compactor: 1,
-    TempoRole.metrics_generator: 1,
+    TempoRole.querier.value: 1,
+    TempoRole.query_frontend.value: 1,
+    TempoRole.ingester.value: 3,
+    TempoRole.distributor.value: 1,
+    TempoRole.compactor.value: 1,
+    TempoRole.metrics_generator.value: 1,
 }
 
 """
@@ -88,7 +87,7 @@ class TempoRolesConfig:
     roles: Iterable[str] = {role for role in TempoRole}
     meta_roles: Mapping[str, Iterable[str]] = META_ROLES
     minimal_deployment: Iterable[str] = MINIMAL_DEPLOYMENT
-    recommended_deployment = RECOMMENDED_DEPLOYMENT
+    recommended_deployment: Dict[str, int] = RECOMMENDED_DEPLOYMENT
 
 
 class ClientAuthTypeEnum(str, enum.Enum):
diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
index 4f8542e..3e19cbc 100644
--- a/tests/integration/helpers.py
+++ b/tests/integration/helpers.py
@@ -324,7 +324,7 @@ def get_traces(tempo_host: str, service_name="tracegenhttp", tls=True):
     return traces
 
 
-@retry(stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=4, max=10))
+@retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=1, min=4, max=10))
 async def get_traces_patiently(tempo_host, service_name="tracegenhttp", tls=True):
     traces = get_traces(tempo_host, service_name=service_name, tls=tls)
     assert len(traces) > 0
diff --git a/tests/integration/test_ingressed_tls.py b/tests/integration/test_ingressed_tls.py
index 6d46d0c..afdcec2 100644
--- a/tests/integration/test_ingressed_tls.py
+++ b/tests/integration/test_ingressed_tls.py
@@ -34,8 +34,7 @@ async def get_ingress_proxied_endpoint(ops_test: OpsTest):
 @pytest.mark.setup
 @pytest.mark.abort_on_fail
 async def test_build_and_deploy(ops_test: OpsTest):
-    # tempo_charm = await ops_test.build_charm(".")
-    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    tempo_charm = await ops_test.build_charm(".")
     resources = {
         "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
         "nginx-prometheus-exporter-image": METADATA["resources"][
diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py
index eec0039..b87b69b 100644
--- a/tests/integration/test_integration.py
+++ b/tests/integration/test_integration.py
@@ -26,8 +26,7 @@ async def test_build_and_deploy(ops_test: OpsTest):
     # Given a fresh build of the charm
     # When deploying it together with testers
     # Then applications should eventually be created
-    # tempo_charm = await ops_test.build_charm(".")
-    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    tempo_charm = await ops_test.build_charm(".")
     tester_charm = await ops_test.build_charm("./tests/integration/tester/")
     tester_grpc_charm = await ops_test.build_charm("./tests/integration/tester-grpc/")
     resources = {
diff --git a/tests/integration/test_scaling_monolithic.py b/tests/integration/test_scaling_monolithic.py
index f7a247f..f7fd344 100644
--- a/tests/integration/test_scaling_monolithic.py
+++ b/tests/integration/test_scaling_monolithic.py
@@ -18,8 +18,7 @@
 @pytest.mark.setup
 @pytest.mark.abort_on_fail
 async def test_deploy_tempo(ops_test: OpsTest):
-    # tempo_charm = await ops_test.build_charm(".")
-    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    tempo_charm = await ops_test.build_charm(".")
 
     resources = {
         "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
diff --git a/tests/integration/test_self_tracing.py b/tests/integration/test_self_tracing.py
index 01badd9..157cf24 100644
--- a/tests/integration/test_self_tracing.py
+++ b/tests/integration/test_self_tracing.py
@@ -19,7 +19,7 @@
 
 @pytest.mark.abort_on_fail
 async def test_build_and_deploy(ops_test: OpsTest):
-    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    tempo_charm = await ops_test.build_charm(".")
     resources = {
         "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
         "nginx-prometheus-exporter-image": METADATA["resources"][
diff --git a/tests/integration/test_self_tracing_remote.py b/tests/integration/test_self_tracing_remote.py
index 0cfcd60..7b337dd 100644
--- a/tests/integration/test_self_tracing_remote.py
+++ b/tests/integration/test_self_tracing_remote.py
@@ -20,7 +20,7 @@
 
 @pytest.mark.abort_on_fail
 async def test_build_and_deploy(ops_test: OpsTest):
-    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    tempo_charm = await ops_test.build_charm(".")
     resources = {
         "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
         "nginx-prometheus-exporter-image": METADATA["resources"][
@@ -37,7 +37,7 @@ async def test_build_and_deploy(ops_test: OpsTest):
     await deploy_cluster(ops_test, APP_NAME)
 
     await asyncio.gather(
-        ops_test.model.wait_for_idle(status="active", raise_on_blocked=True, timeout=1000)
+        ops_test.model.wait_for_idle( apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000)
     )
 
 
@@ -45,6 +45,7 @@ async def test_build_and_deploy(ops_test: OpsTest):
 async def test_relate(ops_test: OpsTest):
     await ops_test.model.integrate(APP_NAME + ":tracing", APP_REMOTE_NAME + ":self-tracing")
     await ops_test.model.wait_for_idle(
+        apps=[APP_NAME],
         status="active",
         timeout=1000,
     )
diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py
index 34a8655..a1e5c31 100644
--- a/tests/integration/test_tls.py
+++ b/tests/integration/test_tls.py
@@ -36,8 +36,7 @@ async def get_tempo_traces_internal_endpoint(ops_test: OpsTest, proto="http"):
 @pytest.mark.setup
 @pytest.mark.abort_on_fail
 async def test_build_and_deploy(ops_test: OpsTest):
-    # tempo_charm = await ops_test.build_charm(".")
-    tempo_charm = "/home/michael/Work/tempo-coordinator-k8s-operator/charm"
+    tempo_charm = await ops_test.build_charm(".")
     resources = {
         "nginx-image": METADATA["resources"]["nginx-image"]["upstream-source"],
         "nginx-prometheus-exporter-image": METADATA["resources"][
diff --git a/tox.ini b/tox.ini
index 5a36eed..957eb0e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -91,6 +91,8 @@ deps =
     -r{toxinidir}/requirements.txt
     # tracegen
     opentelemetry-exporter-otlp-proto-grpc
+    minio
+    tenacity==8.2.3
 commands =
     pytest -v --tb native --log-cli-level=INFO {[vars]tst_path}integration -s {posargs}
 

From e68a9c832c44df02cd253b62884b6f3d5b801658 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 22 Jul 2024 19:22:45 +0300
Subject: [PATCH 07/22] fix lint

---
 tests/integration/test_self_tracing_remote.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_self_tracing_remote.py b/tests/integration/test_self_tracing_remote.py
index 7b337dd..9a889b5 100644
--- a/tests/integration/test_self_tracing_remote.py
+++ b/tests/integration/test_self_tracing_remote.py
@@ -37,7 +37,9 @@ async def test_build_and_deploy(ops_test: OpsTest):
     await deploy_cluster(ops_test, APP_NAME)
 
     await asyncio.gather(
-        ops_test.model.wait_for_idle( apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000)
+        ops_test.model.wait_for_idle(
+            apps=[APP_NAME], status="active", raise_on_blocked=True, timeout=1000
+        )
     )
 
 

From a171fce722aa8e0010380df9f67a994f916b3f30 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Tue, 23 Jul 2024 00:04:27 +0300
Subject: [PATCH 08/22] cosl lib increment

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 6fa4fec..bfcdc8d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,4 +19,4 @@ cryptography
 # lib/charms/tempo_k8s/v1/tracing.py
 pydantic>=2
 # lib/charms/prometheus_k8s/v0/prometheus_scrape.py
-cosl>=0.0.14
+cosl>=0.0.15

From bc9e8c92991be05e47a8dcb77accc1c6595e07dd Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Tue, 23 Jul 2024 11:44:39 +0300
Subject: [PATCH 09/22] unpin version

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index bfcdc8d..3834f32 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,4 +19,4 @@ cryptography
 # lib/charms/tempo_k8s/v1/tracing.py
 pydantic>=2
 # lib/charms/prometheus_k8s/v0/prometheus_scrape.py
-cosl>=0.0.15
+cosl

From f7c55373d1cf3c2c72b3722d0e23536af43da92a Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Thu, 25 Jul 2024 14:54:14 +0300
Subject: [PATCH 10/22] comments address

---
 src/charm.py | 7 +++----
 src/tempo.py | 9 +++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/charm.py b/src/charm.py
index 754bfac..2227962 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -89,6 +89,7 @@ def __init__(self, *args):
             or not self.coordinator.is_coherent
             or not self.coordinator.s3_ready
         ):
+            # logging will be handled by `self.coordinator` for each of the above circumstances.
             return
 
         # lifecycle
@@ -172,8 +173,6 @@ def _on_tracing_broken(self, _):
 
     def _on_cert_handler_changed(self, e: ops.RelationChangedEvent):
 
-        self.coordinator._on_cert_handler_changed(e)
-
         # tls readiness change means config change.
         # sync scheme change with traefik and related consumers
         self._configure_ingress()
@@ -216,7 +215,7 @@ def _on_list_receivers_action(self, event: ops.ActionEvent):
 
     # keep this event handler at the bottom
     def _on_collect_unit_status(self, e: ops.CollectStatusEvent):
-        self.coordinator._on_collect_unit_status(e)
+        pass
         # add Tempo charm custom blocking conditions
         # TODO: avoid waiting for update-status event
         # if not self.is_workload_ready():
@@ -287,7 +286,7 @@ def tempo_otlp_http_endpoint(self) -> Optional[str]:
         """Endpoint at which the charm tracing information will be forwarded."""
         # the charm container and the tempo workload container have apparently the same
         # IP, so we can talk to tempo at localhost.
-        if hasattr(self, "coordinator") and self.coordinator.tracing.is_ready():
+        if self.coordinator and self.coordinator.tracing.is_ready():
             return self.coordinator.tracing.get_endpoint("otlp_http")
         # In absence of another Tempo instance, we don't want to lose this instance's charm traces
         elif self.is_workload_ready():
diff --git a/src/tempo.py b/src/tempo.py
index 93b08fb..4b87682 100644
--- a/src/tempo.py
+++ b/src/tempo.py
@@ -151,12 +151,13 @@ def _build_storage_config(self, s3_config: dict):
     def _build_querier_config(self, roles_addresses: Dict[str, Set[str]]):
         """Build querier config"""
         # if distributor and query-frontend have the same address, then the mode of operation is 'all'.
-        if roles_addresses.get(tempo_config.TempoRole.query_frontend) == roles_addresses.get(
-            tempo_config.TempoRole.distributor
-        ) or not roles_addresses.get(tempo_config.TempoRole.query_frontend):
+        query_frontend_addresses = roles_addresses.get(tempo_config.TempoRole.query_frontend)
+        distributor_addresses = roles_addresses.get(tempo_config.TempoRole.distributor)
+
+        if not query_frontend_addresses or query_frontend_addresses == distributor_addresses:
             addr = "localhost"
         else:
-            addr = roles_addresses.get(tempo_config.TempoRole.query_frontend, set()).pop()
+            addr = query_frontend_addresses.pop()
 
         return tempo_config.Querier(
             frontend_worker=tempo_config.FrontendWorker(

From 4d0452bc7844fb6c42a4531035b878893abfae1b Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Thu, 25 Jul 2024 15:22:37 +0300
Subject: [PATCH 11/22] add comment

---
 src/tempo_config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tempo_config.py b/src/tempo_config.py
index 20ef054..1adbed9 100644
--- a/src/tempo_config.py
+++ b/src/tempo_config.py
@@ -237,6 +237,8 @@ class S3(BaseModel):
     model_config = ConfigDict(populate_by_name=True)
     """Pydantic config."""
 
+    # Use aliases to override keys in `coordinator::_s3_config`
+    # to align with upstream Tempo's configuration keys: `bucket`, `access_key`, `secret_key`.
     bucket_name: str = Field(alias="bucket")
     access_key_id: str = Field(alias="access_key")
     endpoint: str

From 519bdd17b62900852b81f182c1df8708a46a9a7e Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Fri, 26 Jul 2024 13:19:45 +0300
Subject: [PATCH 12/22] fix names

---
 scripts/tracegen.py                     | 2 +-
 tests/integration/helpers.py            | 4 ++--
 tests/integration/test_ingressed_tls.py | 2 +-
 tests/integration/test_tls.py           | 4 +++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/scripts/tracegen.py b/scripts/tracegen.py
index c5fcd5f..76ffb4d 100644
--- a/scripts/tracegen.py
+++ b/scripts/tracegen.py
@@ -41,7 +41,7 @@ def emit_trace(
 
 def _export_trace(span_exporter, log_trace_to_console: bool = False, nonce: Any = None, protocol: Literal["grpc", "http"] = "http"):
     resource = Resource.create(attributes={
-        "service.name": f"tracegen{protocol}",
+        "service.name": f"tracegen-{protocol}",
         "nonce": str(nonce)
     }
     )
diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
index 3e19cbc..5b8ffac 100644
--- a/tests/integration/helpers.py
+++ b/tests/integration/helpers.py
@@ -313,7 +313,7 @@ async def deploy_cluster(ops_test: OpsTest, tempo_app=APP_NAME):
     )
 
 
-def get_traces(tempo_host: str, service_name="tracegenhttp", tls=True):
+def get_traces(tempo_host: str, service_name="tracegen-http", tls=True):
     url = f"{'https' if tls else 'http'}://{tempo_host}:3200/api/search?tags=service.name={service_name}"
     req = requests.get(
         url,
@@ -325,7 +325,7 @@ def get_traces(tempo_host: str, service_name="tracegenhttp", tls=True):
 
 
 @retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=1, min=4, max=10))
-async def get_traces_patiently(tempo_host, service_name="tracegenhttp", tls=True):
+async def get_traces_patiently(tempo_host, service_name="tracegen-http", tls=True):
     traces = get_traces(tempo_host, service_name=service_name, tls=tls)
     assert len(traces) > 0
     return traces
diff --git a/tests/integration/test_ingressed_tls.py b/tests/integration/test_ingressed_tls.py
index afdcec2..3741f04 100644
--- a/tests/integration/test_ingressed_tls.py
+++ b/tests/integration/test_ingressed_tls.py
@@ -135,7 +135,7 @@ async def test_verify_ingressed_traces_grpc_tls(ops_test: OpsTest, nonce, server
         f"{tempo_host}:4317", nonce=nonce, proto="grpc", ops_test=ops_test, use_cert=True
     )
     # THEN we can verify it's been ingested
-    assert await get_traces_patiently(tempo_host, service_name="grpc")
+    assert await get_traces_patiently(tempo_host, service_name="tracegen-grpc")
 
 
 @pytest.mark.teardown
diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py
index a1e5c31..8fb3e27 100644
--- a/tests/integration/test_tls.py
+++ b/tests/integration/test_tls.py
@@ -122,7 +122,9 @@ async def test_verify_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
     # WHEN we emit a trace secured with TLS
     await emit_trace(tempo_endpoint, ops_test, nonce=nonce, verbose=1, proto="grpc", use_cert=True)
     # THEN we can verify it's been ingested
-    await get_traces_patiently(await get_application_ip(ops_test, APP_NAME), service_name="grpc")
+    await get_traces_patiently(
+        await get_application_ip(ops_test, APP_NAME), service_name="tracegen-grpc"
+    )
 
 
 @pytest.mark.teardown

From a405fd96b6ad6c4a88aa7b4cb3e6d1515b2a2435 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 29 Jul 2024 11:39:04 +0300
Subject: [PATCH 13/22] address comments #2

---
 src/charm.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/charm.py b/src/charm.py
index 2227962..ff1e7c1 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -84,11 +84,7 @@ def __init__(self, *args):
         self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status)
 
         # refuse to handle any other event as we can't possibly know what to do.
-        if (
-            not self.coordinator.cluster.has_workers
-            or not self.coordinator.is_coherent
-            or not self.coordinator.s3_ready
-        ):
+        if not self.coordinator.can_handle_events:
             # logging will be handled by `self.coordinator` for each of the above circumstances.
             return
 
@@ -144,11 +140,23 @@ def _external_url(self) -> str:
     def _internal_url(self) -> str:
         """Returns workload's FQDN."""
         scheme = "http"
-        if hasattr(self, "coordinator") and self.coordinator.nginx.are_certificates_on_disk:
+        if self.are_certificates_on_disk:
             scheme = "https"
 
         return f"{scheme}://{self.hostname}"
 
+    @property
+    def are_certificates_on_disk(self) -> bool:
+        """Return True if the certificates files are on disk."""
+        nginx_container = self.unit.get_container("nginx")
+
+        return (
+            nginx_container.can_connect()
+            and nginx_container.exists(CERT_PATH)
+            and nginx_container.exists(KEY_PATH)
+            and nginx_container.exists(CA_CERT_PATH)
+        )
+
     @property
     def enabled_receivers(self) -> Set[str]:
         """Extra receivers enabled through config"""

From 844e1d0bfdfa0134a18872077954bdfa112fd700 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 29 Jul 2024 11:42:48 +0300
Subject: [PATCH 14/22] fix lint

---
 src/charm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/charm.py b/src/charm.py
index ff1e7c1..00d9344 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -20,6 +20,7 @@
 )
 from charms.traefik_route_k8s.v0.traefik_route import TraefikRouteRequirer
 from cosl.coordinated_workers.coordinator import Coordinator
+from cosl.coordinated_workers.nginx import CA_CERT_PATH, CERT_PATH, KEY_PATH
 from ops.charm import CharmBase, RelationEvent
 from ops.main import main
 

From 9cc4939dc9e821b7b1e5bd445b233cf2e8c9a877 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 29 Jul 2024 14:15:01 +0300
Subject: [PATCH 15/22] nginx comments

---
 src/charm.py        | 13 +------------
 src/nginx_config.py | 38 +++++++++++++++++++++++++++++++-------
 src/tempo.py        |  2 +-
 src/tempo_config.py |  2 +-
 4 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/src/charm.py b/src/charm.py
index 00d9344..3ba2bec 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -34,7 +34,7 @@
 @trace_charm(
     tracing_endpoint="tempo_otlp_http_endpoint",
     server_cert="server_ca_cert",
-    extra_types=(Tempo, TracingEndpointProvider),
+    extra_types=(Tempo, TracingEndpointProvider, Coordinator, TempoRolesConfig),
 )
 class TempoCoordinatorCharm(CharmBase):
     """Charmed Operator for Tempo; a distributed tracing backend."""
@@ -81,9 +81,6 @@ def __init__(self, *args):
 
         self.tracing = TracingEndpointProvider(self, external_url=self._external_url)
 
-        # We always listen to collect-status
-        self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status)
-
         # refuse to handle any other event as we can't possibly know what to do.
         if not self.coordinator.can_handle_events:
             # logging will be handled by `self.coordinator` for each of the above circumstances.
@@ -222,14 +219,6 @@ def _on_list_receivers_action(self, event: ops.ActionEvent):
             res[receiver.replace("_", "-")] = self.get_receiver_url(receiver)
         event.set_results(res)
 
-    # keep this event handler at the bottom
-    def _on_collect_unit_status(self, e: ops.CollectStatusEvent):
-        pass
-        # add Tempo charm custom blocking conditions
-        # TODO: avoid waiting for update-status event
-        # if not self.is_workload_ready():
-        #     e.add_status(ops.WaitingStatus("[workload.tempo] Tempo API not ready just yet..."))
-
     ###################
     # UTILITY METHODS #
     ###################
diff --git a/src/nginx_config.py b/src/nginx_config.py
index 6f14d50..3172fdd 100644
--- a/src/nginx_config.py
+++ b/src/nginx_config.py
@@ -3,9 +3,14 @@
 """Nginx workload."""
 
 import logging
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Optional, Set, cast
 
 import crossplane
+from charms.tempo_k8s.v2.tracing import (
+    ReceiverProtocol,
+    TransportProtocolType,
+    receiver_protocol_to_transport_protocol,
+)
 from cosl.coordinated_workers.coordinator import Coordinator
 from cosl.coordinated_workers.nginx import CERT_PATH, KEY_PATH
 
@@ -77,7 +82,9 @@ def _prepare_config(self, coordinator: Coordinator) -> List[dict]:
                     },
                     {"directive": "proxy_read_timeout", "args": ["300"]},
                     # server block
-                    *self._servers(addresses_by_role, coordinator.nginx.are_certificates_on_disk),
+                    *self._build_servers_config(
+                        addresses_by_role, coordinator.nginx.are_certificates_on_disk
+                    ),
                 ],
             },
         ]
@@ -189,25 +196,30 @@ def _listen_args(self, port: int, ipv6: bool, ssl: bool, http2: bool) -> List[st
             args.append("http2")
         return args
 
-    def _servers(
+    def _build_servers_config(
         self, addresses_by_role: Dict[str, Set[str]], tls: bool = False
     ) -> List[Dict[str, Any]]:
         servers = []
         roles = addresses_by_role.keys()
-
+        # generate a server config for receiver protocols (9411, 4317, 4318, 14268, 14250)
         if TempoRole.distributor.value in roles:
             for protocol, port in Tempo.receiver_ports.items():
                 servers.append(
-                    self._server(port, protocol.replace("_", "-"), "grpc" in protocol, tls)
+                    self._build_server_config(
+                        port, protocol.replace("_", "-"), self._is_protocol_grpc(protocol), tls
+                    )
                 )
+        # generate a server config for the Tempo server protocols (3200, 9096)
         if TempoRole.query_frontend.value in roles:
             for protocol, port in Tempo.server_ports.items():
                 servers.append(
-                    self._server(port, protocol.replace("_", "-"), "grpc" in protocol, tls)
+                    self._build_server_config(
+                        port, protocol.replace("_", "-"), self._is_protocol_grpc(protocol), tls
+                    )
                 )
         return servers
 
-    def _server(
+    def _build_server_config(
         self, port: int, upstream: str, grpc: bool = False, tls: bool = False
     ) -> Dict[str, Any]:
         auth_enabled = False
@@ -247,3 +259,15 @@ def _server(
                 *self._locations(upstream, grpc, tls),
             ],
         }
+
+    def _is_protocol_grpc(self, protocol: str) -> bool:
+        """
+        Return True if the given protocol is gRPC
+        """
+        if (
+            protocol == "tempo_grpc"
+            or receiver_protocol_to_transport_protocol.get(cast(ReceiverProtocol, protocol))
+            == TransportProtocolType.grpc
+        ):
+            return True
+        return False
diff --git a/src/tempo.py b/src/tempo.py
index 4b87682..36c5e67 100644
--- a/src/tempo.py
+++ b/src/tempo.py
@@ -75,7 +75,7 @@ def config(
         Only activate the provided receivers.
         """
 
-        config = tempo_config.Tempo(
+        config = tempo_config.TempoConfig(
             auth_enabled=False,
             server=self._build_server_config(coordinator.tls_available),
             distributor=self._build_distributor_config(
diff --git a/src/tempo_config.py b/src/tempo_config.py
index 1adbed9..4ac225b 100644
--- a/src/tempo_config.py
+++ b/src/tempo_config.py
@@ -285,7 +285,7 @@ class Storage(BaseModel):
     trace: TraceStorage
 
 
-class Tempo(BaseModel):
+class TempoConfig(BaseModel):
     """Tempo config schema."""
 
     auth_enabled: bool

From 13f94e5b592a9189b1137213b873c909e219d137 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Fri, 26 Jul 2024 12:29:52 +0300
Subject: [PATCH 16/22] fix list

---
 charmcraft.yaml                          | 12 ++++++++++++
 lib/charms/tempo_k8s/v2/tracing.py       | 19 +++++++++----------
 src/charm.py                             | 13 +++++++++----
 src/tempo.py                             | 12 +++++-------
 tests/scenario/test_ingressed_tracing.py | 16 ++++++++++++++++
 tests/unit/test_charm.py                 |  2 ++
 6 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/charmcraft.yaml b/charmcraft.yaml
index 809c523..bee82b0 100644
--- a/charmcraft.yaml
+++ b/charmcraft.yaml
@@ -141,3 +141,15 @@ config:
       description: Force-enable the receiver for the 'otlp_http' protocol in Tempo, even if there is no integration currently requesting it.
       type: boolean
       default: false
+    always_enable_opencensus:
+      description: Force-enable the receiver for the 'opencensus' protocol in Tempo, even if there is no integration currently requesting it.
+      type: boolean
+      default: false
+    always_enable_jaeger_thrift_http:
+      description: Force-enable the receiver for the 'jaeger_thrift_http' protocol in Tempo, even if there is no integration currently requesting it.
+      type: boolean
+      default: false
+    always_enable_jaeger_grpc:
+      description: Force-enable the receiver for the 'jaeger_grpc' protocol in Tempo, even if there is no integration currently requesting it.
+      type: boolean
+      default: false
diff --git a/lib/charms/tempo_k8s/v2/tracing.py b/lib/charms/tempo_k8s/v2/tracing.py
index 8b9fb4f..e047325 100644
--- a/lib/charms/tempo_k8s/v2/tracing.py
+++ b/lib/charms/tempo_k8s/v2/tracing.py
@@ -107,7 +107,7 @@ def __init__(self, *args):
 
 # Increment this PATCH version before using `charmcraft publish-lib` or reset
 # to 0 if you are raising the major API version
-LIBPATCH = 7
+LIBPATCH = 8
 
 PYDEPS = ["pydantic"]
 
@@ -116,14 +116,14 @@ def __init__(self, *args):
 DEFAULT_RELATION_NAME = "tracing"
 RELATION_INTERFACE_NAME = "tracing"
 
+# Supported list rationale https://github.com/canonical/tempo-coordinator-k8s-operator/issues/8
 ReceiverProtocol = Literal[
     "zipkin",
-    "kafka",
-    "opencensus",
-    "tempo_http",
-    "tempo_grpc",
     "otlp_grpc",
     "otlp_http",
+    "opencensus",
+    "jaeger_grpc",
+    "jaeger_thrift_http",
 ]
 
 RawReceiver = Tuple[ReceiverProtocol, str]
@@ -141,14 +141,13 @@ class TransportProtocolType(str, enum.Enum):
     grpc = "grpc"
 
 
-receiver_protocol_to_transport_protocol = {
+receiver_protocol_to_transport_protocol: Dict[ReceiverProtocol, TransportProtocolType] = {
     "zipkin": TransportProtocolType.http,
-    "kafka": TransportProtocolType.http,
-    "opencensus": TransportProtocolType.http,
-    "tempo_http": TransportProtocolType.http,
-    "tempo_grpc": TransportProtocolType.grpc,
     "otlp_grpc": TransportProtocolType.grpc,
     "otlp_http": TransportProtocolType.http,
+    "opencensus": TransportProtocolType.grpc,
+    "jaeger_thrift_http": TransportProtocolType.http,
+    "jaeger_grpc": TransportProtocolType.grpc,
 }
 """A mapping between telemetry protocols and their corresponding transport protocol.
 """
diff --git a/src/charm.py b/src/charm.py
index 3ba2bec..67c9cd0 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -7,7 +7,7 @@
 import socket
 from pathlib import Path
 from subprocess import CalledProcessError, getoutput
-from typing import Dict, Optional, Set, Tuple, get_args
+from typing import Dict, Optional, Set, Tuple, cast, get_args
 
 import ops
 from charms.grafana_k8s.v0.grafana_source import GrafanaSourceProvider
@@ -16,6 +16,7 @@
     ReceiverProtocol,
     RequestEvent,
     TracingEndpointProvider,
+    TransportProtocolType,
     receiver_protocol_to_transport_protocol,
 )
 from charms.traefik_route_k8s.v0.traefik_route import TraefikRouteRequirer
@@ -318,7 +319,11 @@ def _ingress_config(self) -> dict:
                 # TODO better matcher
                 "rule": "ClientIP(`0.0.0.0/0`)",
             }
-            if sanitized_protocol.endswith("grpc") and not self.coordinator.tls_available:
+            if (
+                protocol == "tempo_grpc"
+                or receiver_protocol_to_transport_protocol.get(cast(ReceiverProtocol, protocol))
+                == TransportProtocolType.grpc
+            ) and not self.coordinator.tls_available:
                 # to send traces to unsecured GRPC endpoints, we need h2c
                 # see https://doc.traefik.io/traefik/v2.0/user-guides/grpc/#with-http-h2c
                 http_services[
@@ -352,13 +357,13 @@ def get_receiver_url(self, protocol: ReceiverProtocol):
         if has_ingress:
             url = (
                 self.ingress.external_host
-                if protocol_type == "grpc"
+                if protocol_type == TransportProtocolType.grpc
                 else f"{self.ingress.scheme}://{self.ingress.external_host}"
             )
         else:
             url = (
                 self.coordinator.hostname
-                if protocol_type == "grpc"
+                if protocol_type == TransportProtocolType.grpc
                 else self.coordinator._internal_url
             )
 
diff --git a/src/tempo.py b/src/tempo.py
index 36c5e67..7bf2b3b 100644
--- a/src/tempo.py
+++ b/src/tempo.py
@@ -34,18 +34,16 @@ class Tempo:
         "tempo_grpc": 9096,  # default grpc listen port is 9095, but that conflicts with promtail.
     }
 
-    # ports source: https://github.com/grafana/tempo/blob/main/example/docker-compose/local/docker-compose.yaml
+    # ports defined are the default ports specified in
+    # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver
+    # for each of the below receivers.
     receiver_ports: Dict[str, int] = {
         "zipkin": 9411,
         "otlp_grpc": 4317,
         "otlp_http": 4318,
         "jaeger_thrift_http": 14268,
-        # todo if necessary add support for:
-        #  "kafka": 42,
-        #  "jaeger_grpc": 14250,
-        #  "opencensus": 43,
-        #  "jaeger_thrift_compact": 44,
-        #  "jaeger_thrift_binary": 45,
+        "jaeger_grpc": 14250,
+        "opencensus": 55678,
     }
 
     all_ports = {**server_ports, **receiver_ports}
diff --git a/tests/scenario/test_ingressed_tracing.py b/tests/scenario/test_ingressed_tracing.py
index 69a1916..43e0e54 100644
--- a/tests/scenario/test_ingressed_tracing.py
+++ b/tests/scenario/test_ingressed_tracing.py
@@ -74,6 +74,16 @@ def test_ingress_relation_set_with_dynamic_config(context, base_state, s3, all_w
                     "rule": "ClientIP(`0.0.0.0/0`)",
                     "service": f"juju-{state.model.name}-{charm_name}-service-tempo-grpc",
                 },
+                f"juju-{state.model.name}-{charm_name}-opencensus": {
+                    "entryPoints": ["opencensus"],
+                    "rule": "ClientIP(`0.0.0.0/0`)",
+                    "service": f"juju-{state.model.name}-{charm_name}-service-opencensus",
+                },
+                f"juju-{state.model.name}-{charm_name}-jaeger-grpc": {
+                    "entryPoints": ["jaeger-grpc"],
+                    "rule": "ClientIP(`0.0.0.0/0`)",
+                    "service": f"juju-{state.model.name}-{charm_name}-service-jaeger-grpc",
+                },
             },
             "services": {
                 f"juju-{state.model.name}-{charm_name}-service-jaeger-thrift-http": {
@@ -94,6 +104,12 @@ def test_ingress_relation_set_with_dynamic_config(context, base_state, s3, all_w
                 f"juju-{state.model.name}-{charm_name}-service-tempo-grpc": {
                     "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:9096"}]}
                 },
+                f"juju-{state.model.name}-{charm_name}-service-opencensus": {
+                    "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:55678"}]}
+                },
+                f"juju-{state.model.name}-{charm_name}-service-jaeger-grpc": {
+                    "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:14250"}]}
+                },
             },
         },
     }
diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py
index 9a74dac..1c2bcc4 100644
--- a/tests/unit/test_charm.py
+++ b/tests/unit/test_charm.py
@@ -30,6 +30,8 @@ def test_entrypoints_are_generated_with_sanitized_names(self):
                 "otlp-grpc": {"address": ":4317"},
                 "otlp-http": {"address": ":4318"},
                 "jaeger-thrift-http": {"address": ":14268"},
+                "jaeger-grpc": {"address": ":14250"},
+                "opencensus": {"address": ":55678"},
             }
         }
         self.assertEqual(self.harness.charm._static_ingress_config, expected_entrypoints)

From 8da72c3b0e4069a91cba7620361203bfa2805e04 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 29 Jul 2024 11:26:49 +0300
Subject: [PATCH 17/22] add tests for protocols

---
 lib/charms/tempo_k8s/v2/tracing.py       |  2 -
 scripts/tracegen.py                      | 55 +++++++++++++++---
 tests/integration/helpers.py             | 11 ++--
 tests/integration/test_ingressed_tls.py  |  4 +-
 tests/integration/test_tls.py            | 72 +++++++++++++-----------
 tests/scenario/test_ingressed_tracing.py |  2 +-
 tox.ini                                  |  6 +-
 7 files changed, 99 insertions(+), 53 deletions(-)

diff --git a/lib/charms/tempo_k8s/v2/tracing.py b/lib/charms/tempo_k8s/v2/tracing.py
index e047325..dfb2336 100644
--- a/lib/charms/tempo_k8s/v2/tracing.py
+++ b/lib/charms/tempo_k8s/v2/tracing.py
@@ -121,7 +121,6 @@ def __init__(self, *args):
     "zipkin",
     "otlp_grpc",
     "otlp_http",
-    "opencensus",
     "jaeger_grpc",
     "jaeger_thrift_http",
 ]
@@ -145,7 +144,6 @@ class TransportProtocolType(str, enum.Enum):
     "zipkin": TransportProtocolType.http,
     "otlp_grpc": TransportProtocolType.grpc,
     "otlp_http": TransportProtocolType.http,
-    "opencensus": TransportProtocolType.grpc,
     "jaeger_thrift_http": TransportProtocolType.http,
     "jaeger_grpc": TransportProtocolType.grpc,
 }
diff --git a/scripts/tracegen.py b/scripts/tracegen.py
index 76ffb4d..08d38aa 100644
--- a/scripts/tracegen.py
+++ b/scripts/tracegen.py
@@ -1,11 +1,15 @@
 import os
 import time
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any, Literal, get_args
+import requests
 
 from opentelemetry import trace
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
+from opentelemetry.exporter.zipkin.json import ZipkinExporter
+from opentelemetry.exporter.jaeger.thrift import JaegerExporter as JaegerThriftHttpExporter
+from opentelemetry.exporter.jaeger.proto.grpc import JaegerExporter as JaegerGRPCExporter
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import (
@@ -13,33 +17,68 @@
     ConsoleSpanExporter,
 )
 
+protocols = Literal[
+    "zipkin",
+    "otlp_grpc",
+    "otlp_http",
+    "jaeger_grpc",
+    "jaeger_thrift_http",
+]
+
 
 def emit_trace(
         endpoint: str,
         log_trace_to_console: bool = False,
         cert: Path = None,
-        protocol: Literal["grpc", "http", "ALL"] = "grpc",
+        protocol: protocols = "otlp_http",
         nonce: Any = None
 ):
     os.environ['OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE'] = str(Path(cert).absolute()) if cert else ""
+    os.environ['OTEL_EXPORTER_JAEGER_CERTIFICATE'] = str(Path(cert).absolute()) if cert else ""
+    # jaeger thrift http exporter does not expose a parameter to set path for CA verification
+    os.environ['SSL_CERT_FILE'] =  str(Path(cert).absolute())  if cert else ""
+    os.environ["REQUESTS_CA_BUNDLE"] =  str(Path(cert).absolute())  if cert else ""
 
-    if protocol == "grpc":
+    # ip:4317
+    if protocol == "otlp_grpc":
         span_exporter = GRPCExporter(
             endpoint=endpoint,
             insecure=not cert,
         )
-    elif protocol == "http":
+    # scheme://ip:4318/v1/traces
+    elif protocol == "otlp_http":
         span_exporter = HTTPExporter(
             endpoint=endpoint,
         )
+    # scheme://ip:9411/v1/traces
+    elif protocol == "zipkin":
+        # zipkin does not expose an arg to pass certificate
+        session = requests.Session() 
+        if cert:
+            session.verify = cert
+        span_exporter = ZipkinExporter(
+            endpoint=endpoint,
+            session=session,
+        )
+    # scheme://ip:14268/api/traces?format=jaeger.thrift
+    elif protocol == "jaeger_thrift_http":
+        span_exporter = JaegerThriftHttpExporter(
+            collector_endpoint=endpoint,
+        )
+    # ip:14250
+    elif protocol == "jaeger_grpc":
+        span_exporter = JaegerGRPCExporter(
+            collector_endpoint = endpoint,
+            insecure=not cert,
+        )
     else:  # ALL
-        return (emit_trace(endpoint, log_trace_to_console, cert, "grpc", nonce=nonce) and
-                emit_trace(endpoint, log_trace_to_console, cert, "http", nonce=nonce))
-
+        for proto in get_args(protocols):
+            emit_trace(endpoint, log_trace_to_console, cert, proto, nonce=nonce)
+        
     return _export_trace(span_exporter, log_trace_to_console=log_trace_to_console, nonce=nonce, protocol = protocol)
 
 
-def _export_trace(span_exporter, log_trace_to_console: bool = False, nonce: Any = None, protocol: Literal["grpc", "http"] = "http"):
+def _export_trace(span_exporter, log_trace_to_console: bool = False, nonce: Any = None, protocol: protocols = "otlp_http"):
     resource = Resource.create(attributes={
         "service.name": f"tracegen-{protocol}",
         "nonce": str(nonce)
diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
index 5b8ffac..a8d1ff0 100644
--- a/tests/integration/helpers.py
+++ b/tests/integration/helpers.py
@@ -289,7 +289,9 @@ async def deploy_cluster(ops_test: OpsTest, tempo_app=APP_NAME):
     # )
 
     # TODO: deploy from latest edge
-    tempo_worker_charm = "/home/michael/Work/tempo-worker-k8s-operator/charm"
+    tempo_worker_charm = (
+        "/home/michael/Work/tempo-worker-k8s-operator/tempo-worker-k8s_ubuntu-22.04-amd64.charm"
+    )
 
     resources = {
         "tempo-image": "docker.io/ubuntu/tempo:2-22.04",
@@ -313,7 +315,7 @@ async def deploy_cluster(ops_test: OpsTest, tempo_app=APP_NAME):
     )
 
 
-def get_traces(tempo_host: str, service_name="tracegen-http", tls=True):
+def get_traces(tempo_host: str, service_name="tracegen-otlp_http", tls=True):
     url = f"{'https' if tls else 'http'}://{tempo_host}:3200/api/search?tags=service.name={service_name}"
     req = requests.get(
         url,
@@ -325,14 +327,14 @@ def get_traces(tempo_host: str, service_name="tracegen-http", tls=True):
 
 
 @retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=1, min=4, max=10))
-async def get_traces_patiently(tempo_host, service_name="tracegen-http", tls=True):
+async def get_traces_patiently(tempo_host, service_name="tracegen-otlp_http", tls=True):
     traces = get_traces(tempo_host, service_name=service_name, tls=tls)
     assert len(traces) > 0
     return traces
 
 
 async def emit_trace(
-    endpoint, ops_test: OpsTest, nonce, proto: str = "http", verbose=0, use_cert=False
+    endpoint, ops_test: OpsTest, nonce, proto: str = "otlp_http", verbose=0, use_cert=False
 ):
     """Use juju ssh to run tracegen from the tempo charm; to avoid any DNS issues."""
     cmd = (
@@ -344,7 +346,6 @@ async def emit_trace(
         f"TRACEGEN_NONCE={nonce} "
         "python3 tracegen.py"
     )
-
     return subprocess.getoutput(cmd)
 
 
diff --git a/tests/integration/test_ingressed_tls.py b/tests/integration/test_ingressed_tls.py
index 3741f04..49c9a0b 100644
--- a/tests/integration/test_ingressed_tls.py
+++ b/tests/integration/test_ingressed_tls.py
@@ -132,10 +132,10 @@ async def test_verify_ingressed_traces_grpc_tls(ops_test: OpsTest, nonce, server
     tempo_host = await get_ingress_proxied_endpoint(ops_test)
 
     await emit_trace(
-        f"{tempo_host}:4317", nonce=nonce, proto="grpc", ops_test=ops_test, use_cert=True
+        f"{tempo_host}:4317", nonce=nonce, proto="otlp_grpc", ops_test=ops_test, use_cert=True
     )
     # THEN we can verify it's been ingested
-    assert await get_traces_patiently(tempo_host, service_name="tracegen-grpc")
+    assert await get_traces_patiently(tempo_host, service_name="tracegen-otlp_grpc")
 
 
 @pytest.mark.teardown
diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py
index 8fb3e27..3719c6b 100644
--- a/tests/integration/test_tls.py
+++ b/tests/integration/test_tls.py
@@ -19,18 +19,23 @@
 SSC = "self-signed-certificates"
 SSC_APP_NAME = "ssc"
 TRACEGEN_SCRIPT_PATH = Path() / "scripts" / "tracegen.py"
+protocols_endpoints = {
+    "jaeger_thrift_http": "https://{}:14268/api/traces?format=jaeger.thrift",
+    "zipkin": "https://{}:9411/v1/traces",
+    "jaeger_grpc": "{}:14250",
+    "otlp_http": "https://{}:4318/v1/traces",
+    "otlp_grpc": "{}:4317",
+}
+
 logger = logging.getLogger(__name__)
 
 
-async def get_tempo_traces_internal_endpoint(ops_test: OpsTest, proto="http"):
+async def get_tempo_traces_internal_endpoint(ops_test: OpsTest, protocol):
     hostname = f"{APP_NAME}-0.{APP_NAME}-endpoints.{ops_test.model.name}.svc.cluster.local"
-    port = "4317"
-    endpoint_postfix = ""
-    if proto == "http":
-        hostname = f"https://{hostname}"
-        port = "4318"
-        endpoint_postfix = "/v1/traces"
-    return f"{hostname}:{port}{endpoint_postfix}"
+    protocol_endpoint = protocols_endpoints.get(protocol)
+    if protocol_endpoint is None:
+        assert False, f"Invalid {protocol}"
+    return protocol_endpoint.format(hostname)
 
 
 @pytest.mark.setup
@@ -80,14 +85,15 @@ async def test_push_tracegen_script_and_deps(ops_test: OpsTest):
     await ops_test.juju(
         "ssh",
         f"{APP_NAME}/0",
-        "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http",
+        "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http"
+        + " opentelemetry-exporter-zipkin opentelemetry-exporter-jaeger",
     )
 
 
 async def test_verify_trace_http_no_tls_fails(ops_test: OpsTest, server_cert, nonce):
     # IF tempo is related to SSC
     # WHEN we emit an http trace, **unsecured**
-    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test)
+    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test, protocol="otlp_http")
     await emit_trace(tempo_endpoint, ops_test, nonce=nonce)  # this should fail
     # THEN we can verify it's not been ingested
     traces = get_traces(await get_application_ip(ops_test, APP_NAME))
@@ -95,35 +101,35 @@ async def test_verify_trace_http_no_tls_fails(ops_test: OpsTest, server_cert, no
 
 
 @pytest.mark.abort_on_fail
-async def test_verify_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
-    # WHEN we emit a trace secured with TLS
-    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test)
-    await emit_trace(tempo_endpoint, ops_test, nonce=nonce, use_cert=True)
-    # THEN we can verify it's eventually ingested
-    await get_traces_patiently(await get_application_ip(ops_test, APP_NAME))
-
+@pytest.mark.parametrize("protocol", list(protocols_endpoints.keys()))
+async def test_verify_traces_force_enabled_protocols_tls(ops_test: OpsTest, nonce, protocol):
 
-@pytest.mark.abort_on_fail
-async def test_verify_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
-    # enable otlp grpc receiver
     tempo_app: Application = ops_test.model.applications[APP_NAME]
-    await tempo_app.set_config(
-        {
-            "always_enable_otlp_grpc": "True",
-        }
-    )
-    await ops_test.model.wait_for_idle(
-        apps=[APP_NAME],
-        status="active",
-        timeout=1000,
-    )
 
-    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test, proto="grpc")
+    # enable each protocol receiver
+    # for protocol in protocols_endpoints:
+
+    # otlp_http should be enabled by default
+    if protocol != "otlp_http":
+        await tempo_app.set_config(
+            {
+                f"always_enable_{protocol}": "True",
+            }
+        )
+        await ops_test.model.wait_for_idle(
+            apps=[APP_NAME],
+            status="active",
+            timeout=1000,
+        )
+
+    tempo_endpoint = await get_tempo_traces_internal_endpoint(ops_test, protocol=protocol)
     # WHEN we emit a trace secured with TLS
-    await emit_trace(tempo_endpoint, ops_test, nonce=nonce, verbose=1, proto="grpc", use_cert=True)
+    await emit_trace(
+        tempo_endpoint, ops_test, nonce=nonce, verbose=1, proto=protocol, use_cert=True
+    )
     # THEN we can verify it's been ingested
     await get_traces_patiently(
-        await get_application_ip(ops_test, APP_NAME), service_name="tracegen-grpc"
+        await get_application_ip(ops_test, APP_NAME), service_name=f"tracegen-{protocol}"
     )
 
 
diff --git a/tests/scenario/test_ingressed_tracing.py b/tests/scenario/test_ingressed_tracing.py
index 43e0e54..b84bfce 100644
--- a/tests/scenario/test_ingressed_tracing.py
+++ b/tests/scenario/test_ingressed_tracing.py
@@ -105,7 +105,7 @@ def test_ingress_relation_set_with_dynamic_config(context, base_state, s3, all_w
                     "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:9096"}]}
                 },
                 f"juju-{state.model.name}-{charm_name}-service-opencensus": {
-                    "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:55678"}]}
+                    "loadBalancer": {"servers": [{"url": "http://1.2.3.4:55678"}]}
                 },
                 f"juju-{state.model.name}-{charm_name}-service-jaeger-grpc": {
                     "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:14250"}]}
diff --git a/tox.ini b/tox.ini
index 957eb0e..309e723 100644
--- a/tox.ini
+++ b/tox.ini
@@ -89,10 +89,12 @@ deps =
     pytest-operator
     requests
     -r{toxinidir}/requirements.txt
-    # tracegen
-    opentelemetry-exporter-otlp-proto-grpc
     minio
     tenacity==8.2.3
+    # tracegen
+    opentelemetry-exporter-otlp-proto-grpc
+    opentelemetry-exporter-zipkin
+    opentelemetry-exporter-jaeger
 commands =
     pytest -v --tb native --log-cli-level=INFO {[vars]tst_path}integration -s {posargs}
 

From f4f4d5bfd19bf90c42f6f79130ec1b0953018fc6 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 29 Jul 2024 11:28:33 +0300
Subject: [PATCH 18/22] remove opencensus

---
 charmcraft.yaml                          | 4 ----
 src/tempo.py                             | 3 ---
 tests/scenario/test_ingressed_tracing.py | 8 --------
 tests/unit/test_charm.py                 | 1 -
 4 files changed, 16 deletions(-)

diff --git a/charmcraft.yaml b/charmcraft.yaml
index bee82b0..fd98faa 100644
--- a/charmcraft.yaml
+++ b/charmcraft.yaml
@@ -141,10 +141,6 @@ config:
       description: Force-enable the receiver for the 'otlp_http' protocol in Tempo, even if there is no integration currently requesting it.
       type: boolean
       default: false
-    always_enable_opencensus:
-      description: Force-enable the receiver for the 'opencensus' protocol in Tempo, even if there is no integration currently requesting it.
-      type: boolean
-      default: false
     always_enable_jaeger_thrift_http:
       description: Force-enable the receiver for the 'jaeger_thrift_http' protocol in Tempo, even if there is no integration currently requesting it.
       type: boolean
diff --git a/src/tempo.py b/src/tempo.py
index 7bf2b3b..fabc139 100644
--- a/src/tempo.py
+++ b/src/tempo.py
@@ -43,7 +43,6 @@ class Tempo:
         "otlp_http": 4318,
         "jaeger_thrift_http": 14268,
         "jaeger_grpc": 14250,
-        "opencensus": 55678,
     }
 
     all_ports = {**server_ports, **receiver_ports}
@@ -223,8 +222,6 @@ def _build_distributor_config(
 
         if "zipkin" in receivers_set:
             config["zipkin"] = receiver_config
-        if "opencensus" in receivers_set:
-            config["opencensus"] = receiver_config
 
         otlp_config = {}
         if "otlp_http" in receivers_set:
diff --git a/tests/scenario/test_ingressed_tracing.py b/tests/scenario/test_ingressed_tracing.py
index b84bfce..e2f6f73 100644
--- a/tests/scenario/test_ingressed_tracing.py
+++ b/tests/scenario/test_ingressed_tracing.py
@@ -74,11 +74,6 @@ def test_ingress_relation_set_with_dynamic_config(context, base_state, s3, all_w
                     "rule": "ClientIP(`0.0.0.0/0`)",
                     "service": f"juju-{state.model.name}-{charm_name}-service-tempo-grpc",
                 },
-                f"juju-{state.model.name}-{charm_name}-opencensus": {
-                    "entryPoints": ["opencensus"],
-                    "rule": "ClientIP(`0.0.0.0/0`)",
-                    "service": f"juju-{state.model.name}-{charm_name}-service-opencensus",
-                },
                 f"juju-{state.model.name}-{charm_name}-jaeger-grpc": {
                     "entryPoints": ["jaeger-grpc"],
                     "rule": "ClientIP(`0.0.0.0/0`)",
@@ -104,9 +99,6 @@ def test_ingress_relation_set_with_dynamic_config(context, base_state, s3, all_w
                 f"juju-{state.model.name}-{charm_name}-service-tempo-grpc": {
                     "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:9096"}]}
                 },
-                f"juju-{state.model.name}-{charm_name}-service-opencensus": {
-                    "loadBalancer": {"servers": [{"url": "http://1.2.3.4:55678"}]}
-                },
                 f"juju-{state.model.name}-{charm_name}-service-jaeger-grpc": {
                     "loadBalancer": {"servers": [{"url": "h2c://1.2.3.4:14250"}]}
                 },
diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py
index 1c2bcc4..1da8294 100644
--- a/tests/unit/test_charm.py
+++ b/tests/unit/test_charm.py
@@ -31,7 +31,6 @@ def test_entrypoints_are_generated_with_sanitized_names(self):
                 "otlp-http": {"address": ":4318"},
                 "jaeger-thrift-http": {"address": ":14268"},
                 "jaeger-grpc": {"address": ":14250"},
-                "opencensus": {"address": ":55678"},
             }
         }
         self.assertEqual(self.harness.charm._static_ingress_config, expected_entrypoints)

From 14e9abdb527b319eb43c8b0da555e6dd1fa2f8e1 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 29 Jul 2024 13:52:27 +0300
Subject: [PATCH 19/22] refactor tracegen

---
 scripts/tracegen.py | 56 +++++++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/scripts/tracegen.py b/scripts/tracegen.py
index 08d38aa..75e5e67 100644
--- a/scripts/tracegen.py
+++ b/scripts/tracegen.py
@@ -17,7 +17,7 @@
     ConsoleSpanExporter,
 )
 
-protocols = Literal[
+ReceiverProtocol = Literal[
     "zipkin",
     "otlp_grpc",
     "otlp_http",
@@ -25,29 +25,24 @@
     "jaeger_thrift_http",
 ]
 
-
-def emit_trace(
-        endpoint: str,
-        log_trace_to_console: bool = False,
-        cert: Path = None,
-        protocol: protocols = "otlp_http",
-        nonce: Any = None
-):
-    os.environ['OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE'] = str(Path(cert).absolute()) if cert else ""
-    os.environ['OTEL_EXPORTER_JAEGER_CERTIFICATE'] = str(Path(cert).absolute()) if cert else ""
+def set_envvars(cert: Path = None):
+    ca_cert_path = str(Path(cert).absolute()) if cert else ""
+    os.environ['OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE'] = ca_cert_path
+    os.environ['OTEL_EXPORTER_JAEGER_CERTIFICATE'] = ca_cert_path
     # jaeger thrift http exporter does not expose a parameter to set path for CA verification
-    os.environ['SSL_CERT_FILE'] =  str(Path(cert).absolute())  if cert else ""
-    os.environ["REQUESTS_CA_BUNDLE"] =  str(Path(cert).absolute())  if cert else ""
+    os.environ['SSL_CERT_FILE'] = ca_cert_path
+    os.environ["REQUESTS_CA_BUNDLE"] = ca_cert_path
 
+def initialize_exporter(protocol: str, endpoint: str, cert: Path = None):
     # ip:4317
     if protocol == "otlp_grpc":
-        span_exporter = GRPCExporter(
+        return GRPCExporter(
             endpoint=endpoint,
             insecure=not cert,
         )
     # scheme://ip:4318/v1/traces
     elif protocol == "otlp_http":
-        span_exporter = HTTPExporter(
+        return HTTPExporter(
             endpoint=endpoint,
         )
     # scheme://ip:9411/v1/traces
@@ -56,29 +51,46 @@ def emit_trace(
         session = requests.Session() 
         if cert:
             session.verify = cert
-        span_exporter = ZipkinExporter(
+        return ZipkinExporter(
             endpoint=endpoint,
             session=session,
         )
     # scheme://ip:14268/api/traces?format=jaeger.thrift
     elif protocol == "jaeger_thrift_http":
-        span_exporter = JaegerThriftHttpExporter(
+        return JaegerThriftHttpExporter(
             collector_endpoint=endpoint,
         )
     # ip:14250
     elif protocol == "jaeger_grpc":
-        span_exporter = JaegerGRPCExporter(
+        return JaegerGRPCExporter(
             collector_endpoint = endpoint,
             insecure=not cert,
         )
-    else:  # ALL
-        for proto in get_args(protocols):
+    else:
+        raise ValueError(f"Unsupported protocol: {protocol}")
+
+def emit_trace(
+        endpoint: str,
+        log_trace_to_console: bool = False,
+        cert: Path = None,
+        protocol: ReceiverProtocol = "otlp_http",
+        nonce: Any = None
+):
+    if protocol == "ALL":
+        for proto in get_args(protocol):
             emit_trace(endpoint, log_trace_to_console, cert, proto, nonce=nonce)
+    else:
+        set_envvars(cert)
+        span_exporter = initialize_exporter(protocol, endpoint, cert)
+        return _export_trace(span_exporter, log_trace_to_console=log_trace_to_console, nonce=nonce, protocol = protocol)
+
+    
+
+        
         
-    return _export_trace(span_exporter, log_trace_to_console=log_trace_to_console, nonce=nonce, protocol = protocol)
 
 
-def _export_trace(span_exporter, log_trace_to_console: bool = False, nonce: Any = None, protocol: protocols = "otlp_http"):
+def _export_trace(span_exporter, log_trace_to_console: bool = False, nonce: Any = None, protocol: ReceiverProtocol = "otlp_http"):
     resource = Resource.create(attributes={
         "service.name": f"tracegen-{protocol}",
         "nonce": str(nonce)

From e754f8b8f34dabc203a3402a2da0cc899507fc77 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Mon, 29 Jul 2024 14:35:40 +0300
Subject: [PATCH 20/22] fix scenario

---
 tests/scenario/conftest.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/scenario/conftest.py b/tests/scenario/conftest.py
index f40f72d..84c387e 100644
--- a/tests/scenario/conftest.py
+++ b/tests/scenario/conftest.py
@@ -1,7 +1,6 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
-from cosl.coordinated_workers.nginx import Nginx
 from scenario import Container, Context, Relation
 
 from charm import TempoCoordinatorCharm
@@ -15,7 +14,7 @@ def coordinator():
 @pytest.fixture
 def tempo_charm():
     with patch("lightkube.core.client.GenericSyncClient"):
-        with patch.object(Nginx, "are_certificates_on_disk", False):
+        with patch("charm.TempoCoordinatorCharm.are_certificates_on_disk", False):
             yield TempoCoordinatorCharm
 
 

From 3c389ecb1e907296f3055700a3c6f98d297c9fb5 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Tue, 30 Jul 2024 17:49:12 +0300
Subject: [PATCH 21/22] fix path

---
 tests/integration/helpers.py | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
index a8d1ff0..edc37b5 100644
--- a/tests/integration/helpers.py
+++ b/tests/integration/helpers.py
@@ -283,23 +283,7 @@ async def deploy_and_configure_minio(ops_test: OpsTest):
 
 
 async def deploy_cluster(ops_test: OpsTest, tempo_app=APP_NAME):
-    # await ops_test.model.deploy(FACADE, channel="edge")
-    # await ops_test.model.wait_for_idle(
-    #     apps=[FACADE], raise_on_blocked=True, status="active", timeout=2000
-    # )
-
-    # TODO: deploy from latest edge
-    tempo_worker_charm = (
-        "/home/michael/Work/tempo-worker-k8s-operator/tempo-worker-k8s_ubuntu-22.04-amd64.charm"
-    )
-
-    resources = {
-        "tempo-image": "docker.io/ubuntu/tempo:2-22.04",
-    }
-
-    await ops_test.model.deploy(
-        tempo_worker_charm, resources=resources, application_name=WORKER_NAME
-    )
+    await ops_test.model.deploy("tempo-worker-k8s", application_name=WORKER_NAME, channel="edge")
     await ops_test.model.deploy(S3_INTEGRATOR, channel="edge")
 
     await ops_test.model.integrate(tempo_app + ":s3", S3_INTEGRATOR + ":s3-credentials")

From c98827d2d2f0179c853f163da2fae5f3ada52c12 Mon Sep 17 00:00:00 2001
From: michael <michael.dmitry98@gmail.com>
Date: Tue, 30 Jul 2024 19:43:18 +0300
Subject: [PATCH 22/22] fix tests

---
 tests/integration/helpers.py            |  7 +++
 tests/integration/test_ingressed_tls.py | 65 ++++++++++++++-----------
 tests/integration/test_tls.py           | 11 +----
 3 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
index edc37b5..fd83122 100644
--- a/tests/integration/helpers.py
+++ b/tests/integration/helpers.py
@@ -27,6 +27,13 @@
 S3_INTEGRATOR = "s3-integrator"
 WORKER_NAME = "tempo-worker"
 APP_NAME = "tempo"
+protocols_endpoints = {
+    "jaeger_thrift_http": "https://{}:14268/api/traces?format=jaeger.thrift",
+    "zipkin": "https://{}:9411/v1/traces",
+    "jaeger_grpc": "{}:14250",
+    "otlp_http": "https://{}:4318/v1/traces",
+    "otlp_grpc": "{}:4317",
+}
 
 logger = logging.getLogger(__name__)
 
diff --git a/tests/integration/test_ingressed_tls.py b/tests/integration/test_ingressed_tls.py
index 49c9a0b..9f5a221 100644
--- a/tests/integration/test_ingressed_tls.py
+++ b/tests/integration/test_ingressed_tls.py
@@ -4,7 +4,12 @@
 
 import pytest
 import yaml
-from helpers import deploy_cluster, emit_trace, get_traces_patiently
+from helpers import (
+    deploy_cluster,
+    emit_trace,
+    get_traces_patiently,
+    protocols_endpoints,
+)
 from juju.application import Application
 from pytest_operator.plugin import OpsTest
 
@@ -20,7 +25,7 @@
 logger = logging.getLogger(__name__)
 
 
-async def get_ingress_proxied_endpoint(ops_test: OpsTest):
+async def get_ingress_proxied_hostname(ops_test: OpsTest):
     status = await ops_test.model.get_status()
     app = status["applications"][TRAEFIK_APP_NAME]
     status_msg = app["status"]["info"]
@@ -31,6 +36,13 @@ async def get_ingress_proxied_endpoint(ops_test: OpsTest):
     return status_msg.replace("Serving at", "").strip()
 
 
+async def get_tempo_ingressed_endpoint(hostname, protocol):
+    protocol_endpoint = protocols_endpoints.get(protocol)
+    if protocol_endpoint is None:
+        assert False, f"Invalid {protocol}"
+    return protocol_endpoint.format(hostname)
+
+
 @pytest.mark.setup
 @pytest.mark.abort_on_fail
 async def test_build_and_deploy(ops_test: OpsTest):
@@ -68,7 +80,8 @@ async def test_push_tracegen_script_and_deps(ops_test: OpsTest):
     await ops_test.juju(
         "ssh",
         f"{APP_NAME}/0",
-        "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http",
+        "python3 -m pip install opentelemetry-exporter-otlp-proto-grpc opentelemetry-exporter-otlp-proto-http"
+        + " opentelemetry-exporter-zipkin opentelemetry-exporter-jaeger",
     )
 
 
@@ -104,38 +117,34 @@ async def test_relate(ops_test: OpsTest):
 
 
 @pytest.mark.abort_on_fail
-async def test_verify_ingressed_trace_http_tls(ops_test: OpsTest, nonce, server_cert):
-    tempo_host = await get_ingress_proxied_endpoint(ops_test)
-
-    await emit_trace(
-        f"https://{tempo_host}:4318/v1/traces", nonce=nonce, ops_test=ops_test, use_cert=True
-    )
-    # THEN we can verify it's been ingested
-    assert await get_traces_patiently(tempo_host)
+@pytest.mark.parametrize("protocol", list(protocols_endpoints.keys()))
+async def test_verify_traces_force_enabled_protocols_tls(ops_test: OpsTest, nonce, protocol):
 
-
-@pytest.mark.abort_on_fail
-async def test_verify_ingressed_traces_grpc_tls(ops_test: OpsTest, nonce, server_cert):
-    # enable otlp grpc receiver
     tempo_app: Application = ops_test.model.applications[APP_NAME]
-    await tempo_app.set_config(
-        {
-            "always_enable_otlp_grpc": "True",
-        }
-    )
-    await ops_test.model.wait_for_idle(
-        apps=[APP_NAME],
-        status="active",
-        timeout=1000,
-    )
 
-    tempo_host = await get_ingress_proxied_endpoint(ops_test)
+    # enable each protocol receiver
+    # otlp_http should be enabled by default
+    if protocol != "otlp_http":
+        await tempo_app.set_config(
+            {
+                f"always_enable_{protocol}": "True",
+            }
+        )
+        await ops_test.model.wait_for_idle(
+            apps=[APP_NAME],
+            status="active",
+            timeout=1000,
+        )
+
+    tempo_host = await get_ingress_proxied_hostname(ops_test)
+    tempo_endpoint = await get_tempo_ingressed_endpoint(tempo_host, protocol=protocol)
+    # WHEN we emit a trace secured with TLS
 
     await emit_trace(
-        f"{tempo_host}:4317", nonce=nonce, proto="otlp_grpc", ops_test=ops_test, use_cert=True
+        tempo_endpoint, ops_test, nonce=nonce, verbose=1, proto=protocol, use_cert=True
     )
     # THEN we can verify it's been ingested
-    assert await get_traces_patiently(tempo_host, service_name="tracegen-otlp_grpc")
+    await get_traces_patiently(tempo_host, service_name=f"tracegen-{protocol}")
 
 
 @pytest.mark.teardown
diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py
index 3719c6b..014e836 100644
--- a/tests/integration/test_tls.py
+++ b/tests/integration/test_tls.py
@@ -10,6 +10,7 @@
     get_application_ip,
     get_traces,
     get_traces_patiently,
+    protocols_endpoints,
 )
 from juju.application import Application
 from pytest_operator.plugin import OpsTest
@@ -19,13 +20,7 @@
 SSC = "self-signed-certificates"
 SSC_APP_NAME = "ssc"
 TRACEGEN_SCRIPT_PATH = Path() / "scripts" / "tracegen.py"
-protocols_endpoints = {
-    "jaeger_thrift_http": "https://{}:14268/api/traces?format=jaeger.thrift",
-    "zipkin": "https://{}:9411/v1/traces",
-    "jaeger_grpc": "{}:14250",
-    "otlp_http": "https://{}:4318/v1/traces",
-    "otlp_grpc": "{}:4317",
-}
+
 
 logger = logging.getLogger(__name__)
 
@@ -107,8 +102,6 @@ async def test_verify_traces_force_enabled_protocols_tls(ops_test: OpsTest, nonc
     tempo_app: Application = ops_test.model.applications[APP_NAME]
 
     # enable each protocol receiver
-    # for protocol in protocols_endpoints:
-
     # otlp_http should be enabled by default
     if protocol != "otlp_http":
         await tempo_app.set_config(