diff --git a/lib/charms/loki_k8s/v1/loki_push_api.py b/lib/charms/loki_k8s/v1/loki_push_api.py index bbae054..c3c1d08 100644 --- a/lib/charms/loki_k8s/v1/loki_push_api.py +++ b/lib/charms/loki_k8s/v1/loki_push_api.py @@ -16,20 +16,24 @@ send log to Loki by implementing the consumer side of the `loki_push_api` relation interface. For instance, a Promtail or Grafana agent charm which needs to send logs to Loki. -- `LogProxyConsumer`: This object can be used by any Charmed Operator which needs to -send telemetry, such as logs, to Loki through a Log Proxy by implementing the consumer side of the -`loki_push_api` relation interface. +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. +In order to be able to control the labels on the logs pushed this object adds a Pebble layer +that runs Promtail in the workload container, injecting Juju topology labels into the +logs on the fly. +This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 LTS. - `LogForwarder`: This object can be used by any Charmed Operator which needs to send the workload standard output (stdout) through Pebble's log forwarding mechanism, to Loki endpoints through the `loki_push_api` relation interface. +In order to be able to control the labels on the logs pushed this object updates the pebble layer's +"log-targets" section with Juju topology. Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju topology labels are used to uniquely identify the workload which generates telemetry like logs. -In order to be able to control the labels on the logs pushed this object adds a Pebble layer -that runs Promtail in the workload container, injecting Juju topology labels into the -logs on the fly. ## LokiPushApiProvider Library Usage @@ -42,13 +46,14 @@ - `charm`: A reference to the parent (Loki) charm. - `relation_name`: The name of the relation that the charm uses to interact - with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer`. + with its clients, which implement `LokiPushApiConsumer` `LogForwarder`, or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). If provided, this relation name must match a provided relation in metadata.yaml with the `loki_push_api` interface. - The default relation name is "logging" for `LokiPushApiConsumer` and "log-proxy" for - `LogProxyConsumer`. + The default relation name is "logging" for `LokiPushApiConsumer` and `LogForwarder`, and + "log-proxy" for `LogProxyConsumer` (note that LogProxyConsumer is deprecated). For example, a provider's `metadata.yaml` file may look as follows: @@ -223,6 +228,9 @@ def __init__(self, *args): ## LogProxyConsumer Library Usage +> Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 +> LTS. + Let's say that we have a workload charm that produces logs, and we need to send those logs to a workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. @@ -519,7 +527,7 @@ def _alert_rules_error(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 8 +LIBPATCH = 11 PYDEPS = ["cosl"] @@ -534,13 +542,21 @@ def _alert_rules_error(self, event): # To update Promtail version you only need to change the PROMTAIL_VERSION and # update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture # you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES. -PROMTAIL_VERSION = "v2.5.0" +PROMTAIL_VERSION = "v2.9.7" +PROMTAIL_ARM_BINARY = { + "filename": "promtail-static-arm64", + "zipsha": "c083fdb45e5c794103f974eeb426489b4142438d9e10d0ae272b2aff886e249b", + "binsha": "4cd055c477a301c0bdfdbcea514e6e93f6df5d57425ce10ffc77f3e16fec1ddf", +} + PROMTAIL_BINARIES = { "amd64": { "filename": "promtail-static-amd64", - "zipsha": "543e333b0184e14015a42c3c9e9e66d2464aaa66eca48b29e185a6a18f67ab6d", - "binsha": "17e2e271e65f793a9fbe81eab887b941e9d680abe82d5a0602888c50f5e0cac9", + "zipsha": "6873cbdabf23062aeefed6de5f00ff382710332af3ab90a48c253ea17e08f465", + "binsha": "28da9b99f81296fe297831f3bc9d92aea43b4a92826b8ff04ba433b8cb92fb50", }, + "arm64": PROMTAIL_ARM_BINARY, + "aarch64": PROMTAIL_ARM_BINARY, } # Paths in `charm` container @@ -1585,7 +1601,8 @@ def __init__( the Loki API endpoint to push logs. It is intended for workloads that can speak loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such as grafana-agent. - (If you only need to forward a few workload log files, then use LogProxyConsumer.) + (If you need to forward workload stdout logs, then use LogForwarder; if you need to forward + log files, then use LogProxyConsumer.) `LokiPushApiConsumer` can be instantiated as follows: @@ -1760,6 +1777,9 @@ class LogProxyEvents(ObjectEvents): class LogProxyConsumer(ConsumerBase): """LogProxyConsumer class. + > Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju + > 3.6 LTS. + The `LogProxyConsumer` object provides a method for attaching `promtail` to a workload in order to generate structured logging data from applications which traditionally log to syslog or do not have native Loki integration. @@ -1831,7 +1851,12 @@ def __init__( # architecture used for promtail binary arch = platform.processor() - self._arch = "amd64" if arch == "x86_64" else arch + if arch in ["x86_64", "amd64"]: + self._arch = "amd64" + elif arch in ["aarch64", "arm64", "armv8b", "armv8l"]: + self._arch = "arm64" + else: + self._arch = arch events = self._charm.on[relation_name] self.framework.observe(events.relation_created, self._on_relation_created) diff --git a/lib/charms/observability_libs/v1/cert_handler.py b/lib/charms/observability_libs/v1/cert_handler.py index f6a3eda..3b87ad4 100644 --- a/lib/charms/observability_libs/v1/cert_handler.py +++ b/lib/charms/observability_libs/v1/cert_handler.py @@ -58,7 +58,7 @@ import logging -from ops.charm import CharmBase, RelationBrokenEvent +from ops.charm import CharmBase from ops.framework import EventBase, EventSource, Object, ObjectEvents from ops.jujuversion import JujuVersion from ops.model import Relation, Secret, SecretNotFoundError @@ -67,7 +67,7 @@ LIBID = "b5cd5cd580f3428fa5f59a8876dcbe6a" LIBAPI = 1 -LIBPATCH = 9 +LIBPATCH = 11 VAULT_SECRET_LABEL = "cert-handler-private-vault" @@ -260,7 +260,13 @@ def retrieve(self) -> Dict[str, str]: def clear(self): """Clear the vault.""" - self._backend.clear() + try: + self._backend.clear() + except SecretNotFoundError: + # guard against: https://github.com/canonical/observability-libs/issues/95 + # this is fine, it might mean an earlier hook had already called .clear() + # not sure what exactly the root cause is, might be a juju bug + logger.debug("Could not clear vault: secret is gone already.") class CertHandler(Object): @@ -274,6 +280,7 @@ def __init__( *, key: str, certificates_relation_name: str = "certificates", + peer_relation_name: str = "peers", cert_subject: Optional[str] = None, sans: Optional[List[str]] = None, ): @@ -285,7 +292,11 @@ def __init__( charm: The owning charm. key: A manually-crafted, static, unique identifier used by ops to identify events. It shouldn't change between one event to another. - certificates_relation_name: Must match metadata.yaml. + certificates_relation_name: Name of the certificates relation over which we obtain TLS certificates. + Must match metadata.yaml. + peer_relation_name: Name of a peer relation used to store our secrets. + Only used on older Juju versions where secrets are not supported. + Must match metadata.yaml. cert_subject: Custom subject. Name collisions are under the caller's responsibility. sans: DNS names. If none are given, use FQDN. """ @@ -309,7 +320,7 @@ def __init__( # self.framework.observe(self.charm.on.secret_remove, self._rotate_csr) else: - vault_backend = _RelationVaultBackend(charm, relation_name="peers") + vault_backend = _RelationVaultBackend(charm, relation_name=peer_relation_name) self.vault = Vault(vault_backend) self.certificates_relation_name = certificates_relation_name @@ -339,10 +350,6 @@ def __init__( self.certificates.on.all_certificates_invalidated, # pyright: ignore self._on_all_certificates_invalidated, ) - self.framework.observe( - self.charm.on[self.certificates_relation_name].relation_broken, # pyright: ignore - self._on_certificates_relation_broken, - ) self.framework.observe( self.charm.on.upgrade_charm, # pyright: ignore self._on_upgrade_charm, @@ -514,7 +521,7 @@ def _csr(self) -> Optional[str]: # ignoring all but the last one. if len(csrs) > 1: logger.warning( - "Multiple CSRs found in `certificates` relation. " + f"Multiple CSRs found in {self.certificates_relation_name!r} relation. " "cert_handler is not ready to expect it." ) @@ -569,14 +576,13 @@ def _on_certificate_invalidated(self, event: CertificateInvalidatedEvent) -> Non self.on.cert_changed.emit() # pyright: ignore def _on_all_certificates_invalidated(self, _: AllCertificatesInvalidatedEvent) -> None: - # Do what you want with this information, probably remove all certificates - # Note: assuming "limit: 1" in metadata - self._generate_csr(overwrite=True, clear_cert=True) - self.on.cert_changed.emit() # pyright: ignore - - def _on_certificates_relation_broken(self, _: RelationBrokenEvent) -> None: """Clear all secrets data when removing the relation.""" + # Note: assuming "limit: 1" in metadata + # The "certificates_relation_broken" event is converted to "all invalidated" custom + # event by the tls-certificates library. Per convention, we let the lib manage the + # relation and we do not observe "certificates_relation_broken" directly. self.vault.clear() + # We do not generate a CSR here because the relation is gone. self.on.cert_changed.emit() # pyright: ignore def _check_juju_supports_secrets(self) -> bool: diff --git a/lib/charms/tempo_k8s/v1/charm_tracing.py b/lib/charms/tempo_k8s/v1/charm_tracing.py index d7b37eb..eead868 100644 --- a/lib/charms/tempo_k8s/v1/charm_tracing.py +++ b/lib/charms/tempo_k8s/v1/charm_tracing.py @@ -14,8 +14,9 @@ `@trace_charm(tracing_endpoint="my_tracing_endpoint")` -2) add to your charm a "my_tracing_endpoint" (you can name this attribute whatever you like) **property** -that returns an otlp http/https endpoint url. If you are using the `TracingEndpointProvider` as +2) add to your charm a "my_tracing_endpoint" (you can name this attribute whatever you like) +**property**, **method** or **instance attribute** that returns an otlp http/https endpoint url. +If you are using the `TracingEndpointProvider` as `self.tracing = TracingEndpointProvider(self)`, the implementation could be: ``` @@ -122,6 +123,7 @@ def my_tracing_endpoint(self) -> Optional[str]: ) import opentelemetry +import ops from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import Span, TracerProvider @@ -146,11 +148,15 @@ def my_tracing_endpoint(self) -> Optional[str]: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 8 +LIBPATCH = 10 PYDEPS = ["opentelemetry-exporter-otlp-proto-http==1.21.0"] logger = logging.getLogger("tracing") +dev_logger = logging.getLogger("tracing-dev") + +# set this to 0 if you are debugging/developing this library source +dev_logger.setLevel(logging.CRITICAL) tracer: ContextVar[Tracer] = ContextVar("tracer") _GetterType = Union[Callable[[CharmBase], Optional[str]], property] @@ -232,60 +238,78 @@ class UntraceableObjectError(TracingError): """Raised when an object you're attempting to instrument cannot be autoinstrumented.""" -def _get_tracing_endpoint(tracing_endpoint_getter, self, charm): - if isinstance(tracing_endpoint_getter, property): - tracing_endpoint = tracing_endpoint_getter.__get__(self) - else: # method or callable - tracing_endpoint = tracing_endpoint_getter(self) +class TLSError(TracingError): + """Raised when the tracing endpoint is https but we don't have a cert yet.""" + + +def _get_tracing_endpoint( + tracing_endpoint_attr: str, + charm_instance: ops.CharmBase, + charm_type: Type[ops.CharmBase], +): + _tracing_endpoint = getattr(charm_instance, tracing_endpoint_attr) + if callable(_tracing_endpoint): + tracing_endpoint = _tracing_endpoint() + else: + tracing_endpoint = _tracing_endpoint if tracing_endpoint is None: - logger.debug( - f"{charm}.{tracing_endpoint_getter} returned None; quietly disabling " - f"charm_tracing for the run." - ) return + elif not isinstance(tracing_endpoint, str): raise TypeError( - f"{charm}.{tracing_endpoint_getter} should return a tempo endpoint (string); " + f"{charm_type.__name__}.{tracing_endpoint_attr} should resolve to a tempo endpoint (string); " f"got {tracing_endpoint} instead." ) - else: - logger.debug(f"Setting up span exporter to endpoint: {tracing_endpoint}/v1/traces") + + dev_logger.debug(f"Setting up span exporter to endpoint: {tracing_endpoint}/v1/traces") return f"{tracing_endpoint}/v1/traces" -def _get_server_cert(server_cert_getter, self, charm): - if isinstance(server_cert_getter, property): - server_cert = server_cert_getter.__get__(self) - else: # method or callable - server_cert = server_cert_getter(self) +def _get_server_cert( + server_cert_attr: str, + charm_instance: ops.CharmBase, + charm_type: Type[ops.CharmBase], +): + _server_cert = getattr(charm_instance, server_cert_attr) + if callable(_server_cert): + server_cert = _server_cert() + else: + server_cert = _server_cert if server_cert is None: logger.warning( - f"{charm}.{server_cert_getter} returned None; sending traces over INSECURE connection." + f"{charm_type}.{server_cert_attr} is None; sending traces over INSECURE connection." ) return elif not Path(server_cert).is_absolute(): raise ValueError( - f"{charm}.{server_cert_getter} should return a valid tls cert absolute path (string | Path)); " + f"{charm_type}.{server_cert_attr} should resolve to a valid tls cert absolute path (string | Path)); " f"got {server_cert} instead." ) return server_cert def _setup_root_span_initializer( - charm: Type[CharmBase], - tracing_endpoint_getter: _GetterType, - server_cert_getter: Optional[_GetterType], + charm_type: Type[CharmBase], + tracing_endpoint_attr: str, + server_cert_attr: Optional[str], service_name: Optional[str] = None, ): """Patch the charm's initializer.""" - original_init = charm.__init__ + original_init = charm_type.__init__ @functools.wraps(original_init) def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): + # we're using 'self' here because this is charm init code, makes sense to read what's below + # from the perspective of the charm. Self.unit.name... + original_init(self, framework, *args, **kwargs) + # we call this from inside the init context instead of, say, _autoinstrument, because we want it to + # be checked on a per-charm-instantiation basis, not on a per-type-declaration one. if not is_enabled(): + # this will only happen during unittesting, hopefully, so it's fine to log a + # bit more verbosely logger.info("Tracing DISABLED: skipping root span initialization") return @@ -294,41 +318,41 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): # self.handle = Handle(None, self.handle_kind, None) original_event_context = framework._event_context + # default service name isn't just app name because it could conflict with the workload service name + _service_name = service_name or f"{self.app.name}-charm" - _service_name = service_name or self.app.name - + unit_name = self.unit.name resource = Resource.create( attributes={ "service.name": _service_name, "compose_service": _service_name, "charm_type": type(self).__name__, # juju topology - "juju_unit": self.unit.name, + "juju_unit": unit_name, "juju_application": self.app.name, "juju_model": self.model.name, "juju_model_uuid": self.model.uuid, } ) provider = TracerProvider(resource=resource) - try: - tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_getter, self, charm) - except Exception: - # if anything goes wrong with retrieving the endpoint, we go on with tracing disabled. - # better than breaking the charm. - logger.exception( - f"exception retrieving the tracing " - f"endpoint from {charm}.{tracing_endpoint_getter}; " - f"proceeding with charm_tracing DISABLED. " - ) - return + + # if anything goes wrong with retrieving the endpoint, we let the exception bubble up. + tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_attr, self, charm_type) if not tracing_endpoint: + # tracing is off if tracing_endpoint is None return server_cert: Optional[Union[str, Path]] = ( - _get_server_cert(server_cert_getter, self, charm) if server_cert_getter else None + _get_server_cert(server_cert_attr, self, charm_type) if server_cert_attr else None ) + if tracing_endpoint.startswith("https://") and not server_cert: + raise TLSError( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr." + ) + exporter = OTLPSpanExporter( endpoint=tracing_endpoint, certificate_file=str(Path(server_cert).absolute()) if server_cert else None, @@ -341,16 +365,18 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): _tracer = get_tracer(_service_name) # type: ignore _tracer_token = tracer.set(_tracer) - dispatch_path = os.getenv("JUJU_DISPATCH_PATH", "") + dispatch_path = os.getenv("JUJU_DISPATCH_PATH", "") # something like hooks/install + event_name = dispatch_path.split("/")[1] if "/" in dispatch_path else dispatch_path + root_span_name = f"{unit_name}: {event_name} event" + span = _tracer.start_span(root_span_name, attributes={"juju.dispatch_path": dispatch_path}) # all these shenanigans are to work around the fact that the opentelemetry tracing API is built # on the assumption that spans will be used as contextmanagers. # Since we don't (as we need to close the span on framework.commit), # we need to manually set the root span as current. - span = _tracer.start_span("charm exec", attributes={"juju.dispatch_path": dispatch_path}) ctx = set_span_in_context(span) - # log a trace id so we can look it up in tempo. + # log a trace id, so we can pick it up from the logs (and jhack) to look it up in tempo. root_trace_id = hex(span.get_span_context().trace_id)[2:] # strip 0x prefix logger.debug(f"Starting root trace with id={root_trace_id!r}.") @@ -358,6 +384,7 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): @contextmanager def wrap_event_context(event_name: str): + dev_logger.info(f"entering event context: {event_name}") # when the framework enters an event context, we create a span. with _span("event: " + event_name) as event_context_span: if event_context_span: @@ -371,6 +398,7 @@ def wrap_event_context(event_name: str): @functools.wraps(original_close) def wrap_close(): + dev_logger.info("tearing down tracer and flushing traces") span.end() opentelemetry.context.detach(span_token) # type: ignore tracer.reset(_tracer_token) @@ -382,7 +410,7 @@ def wrap_close(): framework.close = wrap_close return - charm.__init__ = wrap_init + charm_type.__init__ = wrap_init def trace_charm( @@ -433,8 +461,8 @@ def _decorator(charm_type: Type[CharmBase]): """Autoinstrument the wrapped charmbase type.""" _autoinstrument( charm_type, - tracing_endpoint_getter=getattr(charm_type, tracing_endpoint), - server_cert_getter=getattr(charm_type, server_cert) if server_cert else None, + tracing_endpoint_attr=tracing_endpoint, + server_cert_attr=server_cert, service_name=service_name, extra_types=extra_types, ) @@ -445,8 +473,8 @@ def _decorator(charm_type: Type[CharmBase]): def _autoinstrument( charm_type: Type[CharmBase], - tracing_endpoint_getter: _GetterType, - server_cert_getter: Optional[_GetterType] = None, + tracing_endpoint_attr: str, + server_cert_attr: Optional[str] = None, service_name: Optional[str] = None, extra_types: Sequence[type] = (), ) -> Type[CharmBase]: @@ -461,29 +489,29 @@ def _autoinstrument( >>> from ops.main import main >>> _autoinstrument( >>> MyCharm, - >>> tracing_endpoint_getter=MyCharm.tempo_otlp_http_endpoint, + >>> tracing_endpoint_attr="tempo_otlp_http_endpoint", >>> service_name="MyCharm", >>> extra_types=(Foo, Bar) >>> ) >>> main(MyCharm) :param charm_type: the CharmBase subclass to autoinstrument. - :param server_cert_getter: method or property on the charm type that returns an - optional absolute path to a tls certificate to be used when sending traces to a remote server. - This needs to be a valid path to a certificate. - :param tracing_endpoint_getter: method or property on the charm type that returns an - optional tempo url. If None, tracing will be effectively disabled. Else, traces will be - pushed to that endpoint. + :param server_cert_attr: name of an attribute, method or property on the charm type that + returns an optional absolute path to a tls certificate to be used when sending traces to + a remote server. This needs to be a valid path to a certificate. + :param tracing_endpoint_attr: name of an attribute, method or property on the charm type that + returns an optional tempo url. If None, tracing will be effectively disabled. Else, + traces will be pushed to that endpoint. :param service_name: service name tag to attach to all traces generated by this charm. Defaults to the juju application name this charm is deployed under. :param extra_types: pass any number of types that you also wish to autoinstrument. For example, charm libs, relation endpoint wrappers, workload abstractions, ... """ - logger.info(f"instrumenting {charm_type}") + dev_logger.info(f"instrumenting {charm_type}") _setup_root_span_initializer( charm_type, - tracing_endpoint_getter, - server_cert_getter=server_cert_getter, + tracing_endpoint_attr, + server_cert_attr=server_cert_attr, service_name=service_name, ) trace_type(charm_type) @@ -500,12 +528,12 @@ def trace_type(cls: _T) -> _T: It assumes that this class is only instantiated after a charm type decorated with `@trace_charm` has been instantiated. """ - logger.info(f"instrumenting {cls}") + dev_logger.info(f"instrumenting {cls}") for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): - logger.info(f"discovered {method}") + dev_logger.info(f"discovered {method}") - if not method.__name__.isidentifier(): - logger.info(f"skipping {method}") + if method.__name__.startswith("__"): + dev_logger.info(f"skipping {method} (dunder)") continue new_method = trace_method(method) @@ -533,7 +561,7 @@ def trace_function(function: _F) -> _F: def _trace_callable(callable: _F, qualifier: str) -> _F: - logger.info(f"instrumenting {callable}") + dev_logger.info(f"instrumenting {callable}") # sig = inspect.signature(callable) @functools.wraps(callable) diff --git a/lib/charms/tempo_k8s/v2/tracing.py b/lib/charms/tempo_k8s/v2/tracing.py index b4e341c..8b9fb4f 100644 --- a/lib/charms/tempo_k8s/v2/tracing.py +++ b/lib/charms/tempo_k8s/v2/tracing.py @@ -72,6 +72,7 @@ def __init__(self, *args): import enum import json import logging +from pathlib import Path from typing import ( TYPE_CHECKING, Any, @@ -82,6 +83,7 @@ def __init__(self, *args): Optional, Sequence, Tuple, + Union, cast, ) @@ -105,7 +107,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 6 +LIBPATCH = 7 PYDEPS = ["pydantic"] @@ -921,3 +923,68 @@ def get_endpoint( return None return endpoint + + +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Utility function to determine the charm_tracing config you will likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + If you are using charm_tracing >= v1.9: + >>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm + >>> from lib.charms.tempo_k8s.v2.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + + If you are using charm_tracing < v1.9: + >>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm + >>> from lib.charms.tempo_k8s.v2.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self._my_endpoint, self._cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + >>> @property + >>> def my_endpoint(self): + >>> return self._my_endpoint + >>> @property + >>> def cert_path(self): + >>> return self._cert_path + + """ + if not endpoint_requirer.is_ready(): + return None, None + + endpoint = endpoint_requirer.get_endpoint("otlp_http") + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None: + raise TracingError("Cannot send traces to an https endpoint without a certificate.") + elif not Path(cert_path).exists(): + # if endpoint is https BUT we don't have a server_cert yet: + # disable charm tracing until we do to prevent tls errors + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None diff --git a/pyproject.toml b/pyproject.toml index acaf006..8eb7b54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,10 +4,6 @@ name = "tempo-k8s" version = "0.1" # this is in fact irrelevant -[project.optional-dependencies] -lib_pydeps = [ -] - [tool.pyright] extraPaths = ["lib"] pythonVersion = "3.8" diff --git a/src/charm.py b/src/charm.py index c828048..b739d59 100755 --- a/src/charm.py +++ b/src/charm.py @@ -421,7 +421,7 @@ def tempo_otlp_http_endpoint(self) -> Optional[str]: """Endpoint at which the charm tracing information will be forwarded.""" # the charm container and the tempo workload container have apparently the same # IP, so we can talk to tempo at localhost. - if self.tempo.is_ready(): + if self.tempo.is_ready: return f"{self._internal_url}:{self.tempo.receiver_ports['otlp_http']}" return None diff --git a/src/tempo_cluster.py b/src/tempo_cluster.py index df4a93c..99d4f45 100644 --- a/src/tempo_cluster.py +++ b/src/tempo_cluster.py @@ -42,7 +42,7 @@ class TempoRole(str, Enum): """ # scalable-single-binary is a bit too long to type - all = "scalable-single-binary" # default, meta-role. + all = "all" # default, meta-role. gets remapped to scalable-single-binary by the worker. querier = "querier" query_frontend = "query-frontend" @@ -393,7 +393,7 @@ def gather_roles(self) -> Dict[TempoRole, int]: remote_app_databag ).role except DataValidationError as e: - log.info(f"invalid databag contents: {e}") + log.debug(f"invalid databag contents: {e}") continue # the number of units with each role is the number of remote units diff --git a/tests/catan/test_clustering.py b/tests/catan/test_clustering.py index 0c0acda..f91dd17 100644 --- a/tests/catan/test_clustering.py +++ b/tests/catan/test_clustering.py @@ -8,8 +8,6 @@ from catan import App, Catan from scenario import Container, State -from tempo_cluster import TempoClusterRequirerAppData, TempoRole - os.environ["CHARM_TRACING_ENABLED"] = "0" # everyone has their own @@ -104,6 +102,11 @@ def tempo_coordinator(): yield tempo +@pytest.fixture +def tempo_coordinator_state(tempo_peers): + return State(relations=[tempo_peers]) + + @pytest.fixture def tempo_worker(): tempo = App.from_path( @@ -113,6 +116,14 @@ def tempo_worker(): yield tempo +@pytest.fixture +def tempo_worker_state(): + return State( + config={"role": "all"}, + containers=[Container(name="tempo", can_connect=True)], + ) + + @pytest.fixture(scope="function") def s3_config(): return { @@ -132,31 +143,11 @@ def s3(s3_config): ) -@pytest.fixture(scope="function") -def all_worker(): - return scenario.Relation( - "tempo-cluster", - remote_app_data=TempoClusterRequirerAppData(role=TempoRole.all).dump(), - ) - - @pytest.fixture(scope="function") def tempo_peers(): return scenario.PeerRelation("peers") -@pytest.fixture -def tempo_coordinator_state(tempo_peers): - return State(relations=[tempo_peers]) - - -@pytest.fixture -def tempo_worker_state(): - return State( - containers=[Container(name="tempo", can_connect=True)], - ) - - @pytest.fixture def update_s3_facade_action(): return scenario.Action( diff --git a/tests/scenario/test_charm_tracing.py b/tests/scenario/test_charm_tracing.py deleted file mode 100644 index e67bba5..0000000 --- a/tests/scenario/test_charm_tracing.py +++ /dev/null @@ -1,558 +0,0 @@ -import logging -import os -from unittest.mock import patch - -import pytest -import scenario -from charms.tempo_k8s.v1.charm_tracing import CHARM_TRACING_ENABLED -from charms.tempo_k8s.v1.charm_tracing import _autoinstrument as autoinstrument -from charms.tempo_k8s.v2.tracing import ( - ProtocolNotRequestedError, - ProtocolType, - Receiver, - TracingEndpointRequirer, - TracingProviderAppData, - TracingRequirerAppData, -) -from ops import EventBase, EventSource, Framework -from ops.charm import CharmBase, CharmEvents -from scenario import Context, State - -from lib.charms.tempo_k8s.v1.charm_tracing import get_current_span, trace - -os.environ[CHARM_TRACING_ENABLED] = "1" - -logger = logging.getLogger(__name__) - - -@pytest.fixture(autouse=True) -def cleanup(): - # if any other test module disabled it... - os.environ[CHARM_TRACING_ENABLED] = "1" - - def patched_set_tracer_provider(tracer_provider, log): - import opentelemetry - - opentelemetry.trace._TRACER_PROVIDER = tracer_provider - - with patch("opentelemetry.trace._set_tracer_provider", new=patched_set_tracer_provider): - yield - - -class MyCharmSimple(CharmBase): - META = {"name": "frank"} - - @property - def tempo(self): - return "foo.bar:80" - - -autoinstrument(MyCharmSimple, MyCharmSimple.tempo) - - -def test_base_tracer_endpoint(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmSimple, meta=MyCharmSimple.META) - ctx.run("start", State()) - assert "Setting up span exporter to endpoint: foo.bar:80" in caplog.text - assert "Starting root trace with id=" in caplog.text - span = f.call_args_list[0].args[0][0] - assert span.resource.attributes["service.name"] == "frank" - assert span.resource.attributes["compose_service"] == "frank" - assert span.resource.attributes["charm_type"] == "MyCharmSimple" - - -class SubObject: - def foo(self): - return "bar" - - -class MyCharmSubObject(CharmBase): - META = {"name": "frank"} - - def __init__(self, framework: Framework): - super().__init__(framework) - self.subobj = SubObject() - framework.observe(self.on.start, self._on_start) - - def _on_start(self, _): - self.subobj.foo() - - @property - def tempo(self): - return "foo.bar:80" - - -autoinstrument(MyCharmSubObject, MyCharmSubObject.tempo, extra_types=[SubObject]) - - -def test_subobj_tracer_endpoint(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmSubObject, meta=MyCharmSubObject.META) - ctx.run("start", State()) - spans = f.call_args_list[0].args[0] - assert spans[0].name == "method call: SubObject.foo" - - -class MyCharmInitAttr(CharmBase): - META = {"name": "frank"} - - def __init__(self, framework: Framework): - super().__init__(framework) - self._tempo = "foo.bar:80" - - @property - def tempo(self): - return self._tempo - - -autoinstrument(MyCharmInitAttr, MyCharmInitAttr.tempo) - - -def test_init_attr(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmInitAttr, meta=MyCharmInitAttr.META) - ctx.run("start", State()) - assert "Setting up span exporter to endpoint: foo.bar:80" in caplog.text - span = f.call_args_list[0].args[0][0] - assert span.resource.attributes["service.name"] == "frank" - assert span.resource.attributes["compose_service"] == "frank" - assert span.resource.attributes["charm_type"] == "MyCharmInitAttr" - - -class MyCharmSimpleDisabled(CharmBase): - META = {"name": "frank"} - - @property - def tempo(self): - return None - - -autoinstrument(MyCharmSimpleDisabled, MyCharmSimpleDisabled.tempo) - - -def test_base_tracer_endpoint_disabled(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmSimpleDisabled, meta=MyCharmSimpleDisabled.META) - ctx.run("start", State()) - - assert "quietly disabling charm_tracing for the run." in caplog.text - assert not f.called - - -@trace -def _my_fn(foo): - return foo + 1 - - -class MyCharmSimpleEvent(CharmBase): - META = {"name": "frank"} - - def __init__(self, fw): - super().__init__(fw) - span = get_current_span() - assert span is None # can't do that in init. - fw.observe(self.on.start, self._on_start) - - def _on_start(self, _): - span = get_current_span() - span.add_event( - "log", - { - "foo": "bar", - "baz": "qux", - }, - ) - _my_fn(2) - - @property - def tempo(self): - return "foo.bar:80" - - -autoinstrument(MyCharmSimpleEvent, MyCharmSimpleEvent.tempo) - - -def test_base_tracer_endpoint_event(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmSimpleEvent, meta=MyCharmSimpleEvent.META) - ctx.run("start", State()) - - spans = f.call_args_list[0].args[0] - span0, span1, span2, span3 = spans - assert span0.name == "function call: _my_fn" - - assert span1.name == "method call: MyCharmSimpleEvent._on_start" - - assert span2.name == "event: start" - evt = span2.events[0] - assert evt.name == "start" - - assert span3.name == "charm exec" - - for span in spans: - assert span.resource.attributes["service.name"] == "frank" - - -def test_juju_topology_injection(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmSimpleEvent, meta=MyCharmSimpleEvent.META) - state = ctx.run("start", State()) - - spans = f.call_args_list[0].args[0] - - for span in spans: - # topology - assert span.resource.attributes["juju_unit"] == "frank/0" - assert span.resource.attributes["juju_application"] == "frank" - assert span.resource.attributes["juju_model"] == state.model.name - assert span.resource.attributes["juju_model_uuid"] == state.model.uuid - - -class MyCharmWithMethods(CharmBase): - META = {"name": "frank"} - - def __init__(self, fw): - super().__init__(fw) - fw.observe(self.on.start, self._on_start) - - def _on_start(self, _): - self.a() - self.b() - self.c() - - def a(self): - pass - - def b(self): - pass - - def c(self): - pass - - @property - def tempo(self): - return "foo.bar:80" - - -autoinstrument(MyCharmWithMethods, MyCharmWithMethods.tempo) - - -def test_base_tracer_endpoint_methods(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmWithMethods, meta=MyCharmWithMethods.META) - ctx.run("start", State()) - - spans = f.call_args_list[0].args[0] - span_names = [span.name for span in spans] - assert span_names == [ - "method call: MyCharmWithMethods.a", - "method call: MyCharmWithMethods.b", - "method call: MyCharmWithMethods.c", - "method call: MyCharmWithMethods._on_start", - "event: start", - "charm exec", - ] - - -class Foo(EventBase): - pass - - -class MyEvents(CharmEvents): - foo = EventSource(Foo) - - -class MyCharmWithCustomEvents(CharmBase): - on = MyEvents() - - META = {"name": "frank"} - - def __init__(self, fw): - super().__init__(fw) - fw.observe(self.on.start, self._on_start) - fw.observe(self.on.foo, self._on_foo) - - def _on_start(self, _): - self.on.foo.emit() - - def _on_foo(self, _): - pass - - @property - def tempo(self): - return "foo.bar:80" - - -autoinstrument(MyCharmWithCustomEvents, MyCharmWithCustomEvents.tempo) - - -def test_base_tracer_endpoint_custom_event(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmWithCustomEvents, meta=MyCharmWithCustomEvents.META) - ctx.run("start", State()) - - spans = f.call_args_list[0].args[0] - span_names = [span.name for span in spans] - assert span_names == [ - "method call: MyCharmWithCustomEvents._on_foo", - "event: foo", - "method call: MyCharmWithCustomEvents._on_start", - "event: start", - "charm exec", - ] - # only the charm exec span is a root - assert not spans[-1].parent - for span in spans[:-1]: - assert span.parent - assert span.parent.trace_id - assert len({(span.parent.trace_id if span.parent else 0) for span in spans}) == 2 - - -class MyRemoteCharm(CharmBase): - META = {"name": "charlie", "requires": {"tracing": {"interface": "tracing", "limit": 1}}} - _request = True - - def __init__(self, framework: Framework): - super().__init__(framework) - self.tracing = TracingEndpointRequirer( - self, "tracing", protocols=(["otlp_http"] if self._request else []) - ) - - def tempo(self): - return self.tracing.get_endpoint("otlp_http") - - -autoinstrument(MyRemoteCharm, MyRemoteCharm.tempo) - - -@pytest.mark.parametrize("leader", (True, False)) -def test_tracing_requirer_remote_charm_request_response(leader): - # IF the leader unit (whoever it is) did request the endpoint to be activated - MyRemoteCharm._request = True - ctx = Context(MyRemoteCharm, meta=MyRemoteCharm.META) - # WHEN you get any event AND the remote unit has already replied - tracing = scenario.Relation( - "tracing", - # if we're not leader, assume the leader did its part already - local_app_data=( - TracingRequirerAppData(receivers=["otlp_http"]).dump() if not leader else {} - ), - remote_app_data=TracingProviderAppData( - host="foo.com", - receivers=[ - Receiver( - url="http://foo.com:80", protocol=ProtocolType(name="otlp_http", type="http") - ) - ], - ).dump(), - ) - with ctx.manager("start", State(leader=leader, relations=[tracing])) as mgr: - # THEN you're good - assert mgr.charm.tempo() == "http://foo.com:80" - - -@pytest.mark.parametrize("leader", (True, False)) -def test_tracing_requirer_remote_charm_no_request_but_response(leader): - # IF the leader did NOT request the endpoint to be activated - MyRemoteCharm._request = False - ctx = Context(MyRemoteCharm, meta=MyRemoteCharm.META) - # WHEN you get any event AND the remote unit has already replied - tracing = scenario.Relation( - "tracing", - # empty local app data - remote_app_data=TracingProviderAppData( - # but the remote end has sent the data you need - receivers=[ - Receiver( - url="http://foo.com:80", protocol=ProtocolType(name="otlp_http", type="http") - ) - ], - ).dump(), - ) - with ctx.manager("start", State(leader=leader, relations=[tracing])) as mgr: - # THEN you're lucky, but you're good - assert mgr.charm.tempo() == "http://foo.com:80" - - -@pytest.mark.parametrize("relation", (True, False)) -@pytest.mark.parametrize("leader", (True, False)) -def test_tracing_requirer_remote_charm_no_request_no_response(leader, relation): - """Verify that the charm successfully executes (with charm_tracing disabled) if the tempo() call raises.""" - # IF the leader did NOT request the endpoint to be activated - MyRemoteCharm._request = False - ctx = Context(MyRemoteCharm, meta=MyRemoteCharm.META) - # WHEN you get any event - if relation: - # AND you have an empty relation - tracing = scenario.Relation( - "tracing", - # empty local and remote app data - ) - relations = [tracing] - else: - # OR no relation at all - relations = [] - - # THEN you're not totally good: self.tempo() will raise, but charm exec will still exit 0 - with ctx.manager("start", State(leader=leader, relations=relations)) as mgr: - with pytest.raises(ProtocolNotRequestedError): - assert mgr.charm.tempo() is None - - -class MyRemoteBorkyCharm(CharmBase): - META = {"name": "charlie", "requires": {"tracing": {"interface": "tracing", "limit": 1}}} - _borky_return_value = None - - def tempo(self): - return self._borky_return_value - - -autoinstrument(MyRemoteBorkyCharm, MyRemoteBorkyCharm.tempo) - - -@pytest.mark.parametrize("borky_return_value", (True, 42, object(), 0.2, [], (), {})) -def test_borky_tempo_return_value(borky_return_value, caplog): - """Verify that the charm exits 0 (with charm_tracing disabled) if the tempo() call returns bad values.""" - # IF the charm's tempo endpoint getter returns anything but None or str - MyRemoteBorkyCharm._borky_return_value = borky_return_value - ctx = Context(MyRemoteBorkyCharm, meta=MyRemoteBorkyCharm.META) - # WHEN you get any event - # THEN you're not totally good: self.tempo() will raise, but charm exec will still exit 0 - - ctx.run("start", State()) - # traceback from the TypeError raised by _get_tracing_endpoint - assert "should return a tempo endpoint" in caplog.text - # logger.exception in _setup_root_span_initializer - assert "exception retrieving the tracing endpoint from" in caplog.text - assert "proceeding with charm_tracing DISABLED." in caplog.text - - -class MyCharmStaticMethods(CharmBase): - META = {"name": "jolene"} - - def __init__(self, fw): - super().__init__(fw) - fw.observe(self.on.start, self._on_start) - fw.observe(self.on.update_status, self._on_update_status) - - def _on_start(self, _): - for o in (OtherObj(), OtherObj): - for meth in ("_staticmeth", "_staticmeth1", "_staticmeth2"): - assert getattr(o, meth)(1) == 2 - - def _on_update_status(self, _): - # super-ugly edge cases - OtherObj()._staticmeth3(OtherObj()) - OtherObj()._staticmeth4(OtherObj()) - OtherObj._staticmeth3(OtherObj()) - OtherObj._staticmeth4(OtherObj(), foo=2) - - @property - def tempo(self): - return "foo.bar:80" - - -class OtherObj: - @staticmethod - def _staticmeth(i: int, *args, **kwargs): - return 1 + i - - @staticmethod - def _staticmeth1(i: int): - return 1 + i - - @staticmethod - def _staticmeth2(i: int, foo="bar"): - return 1 + i - - @staticmethod - def _staticmeth3(abc: "OtherObj", foo="bar"): - return 1 + 1 - - @staticmethod - def _staticmeth4(abc: int, foo="bar"): - return 1 + 1 - - -autoinstrument(MyCharmStaticMethods, MyCharmStaticMethods.tempo, extra_types=[OtherObj]) - - -def test_trace_staticmethods(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmStaticMethods, meta=MyCharmStaticMethods.META) - ctx.run("start", State()) - - spans = f.call_args_list[0].args[0] - - span_names = [span.name for span in spans] - assert span_names == [ - "method call: OtherObj._staticmeth", - "method call: OtherObj._staticmeth1", - "method call: OtherObj._staticmeth2", - "method call: OtherObj._staticmeth", - "method call: OtherObj._staticmeth1", - "method call: OtherObj._staticmeth2", - "method call: MyCharmStaticMethods._on_start", - "event: start", - "charm exec", - ] - - for span in spans: - assert span.resource.attributes["service.name"] == "jolene" - - -def test_trace_staticmethods_bork(caplog): - import opentelemetry - - with patch( - "opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter.export" - ) as f: - f.return_value = opentelemetry.sdk.trace.export.SpanExportResult.SUCCESS - ctx = Context(MyCharmStaticMethods, meta=MyCharmStaticMethods.META) - ctx.run("update-status", State()) diff --git a/tox.ini b/tox.ini index 78e861d..04c422d 100644 --- a/tox.ini +++ b/tox.ini @@ -50,7 +50,6 @@ deps = pytest<8.2.0 # https://github.com/pytest-dev/pytest/issues/12263 coverage[toml] deepdiff - .[lib_pydeps] -r{toxinidir}/requirements.txt commands = coverage run --source={[vars]src_path} \ @@ -63,11 +62,22 @@ deps = pytest<8.2.0 # https://github.com/pytest-dev/pytest/issues/12263 coverage[toml] ops-scenario>=4.0.3 - .[lib_pydeps] -r{toxinidir}/requirements.txt commands = coverage run --source={[vars]src_path} \ -m pytest -v --tb native -s {posargs} {[vars]tst_path}scenario + coverage report[testenv:scenario] + +[testenv:catan] +description = Run catan integration tests +deps = + pytest<8.2.0 # https://github.com/pytest-dev/pytest/issues/12263 + /home/pietro/canonical/catan/dist/catan-0.1.tar.gz + coverage[toml] + -r{toxinidir}/requirements.txt +commands = + coverage run --source={[vars]src_path} \ + -m pytest -v --tb native -s {posargs} {[vars]tst_path}catan coverage report [testenv:integration] @@ -89,7 +99,6 @@ description = Run interface tests deps = pytest<8.2.0 # https://github.com/pytest-dev/pytest/issues/12263 -r{toxinidir}/requirements.txt - .[lib_pydeps] pytest-interface-tester # git+https://github.com/canonical/pytest-interface-tester commands = @@ -100,7 +109,6 @@ commands = description = Static code checking deps = # pydeps - .[lib_pydeps] pyright==1.1.340 -r{toxinidir}/requirements.txt commands =