From 3c48ce8c14d035d80bc104b47b2ac0344e20c0e5 Mon Sep 17 00:00:00 2001 From: shayancanonical <99665202+shayancanonical@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:49:29 -0400 Subject: [PATCH] Attempt to stabilize exporter tests in light of bind-address in use issues (#154) Counterpart of https://github.com/canonical/mysql-router-k8s-operator/pull/254 ## Summary 1. Use `requests.get(stream=False)` to avoid open connections on the router exporter 2. Split the two exporter tests into different files so that they end up in different integration test groups 3. Use a much larger timeout (7mins) that the TIME_WAIT timeout (60s/1min) to see if the snap daemon (mysql-router-exporter) will restart successfully --- .../data_platform_libs/v0/data_interfaces.py | 25 +- lib/charms/tempo_k8s/v1/charm_tracing.py | 301 ++++++++++++------ lib/charms/tempo_k8s/v2/tracing.py | 69 +++- src/relations/cos.py | 4 +- src/snap.py | 6 +- tests/integration/test_exporter.py | 167 +--------- tests/integration/test_exporter_with_tls.py | 241 ++++++++++++++ 7 files changed, 554 insertions(+), 259 deletions(-) create mode 100644 tests/integration/test_exporter_with_tls.py diff --git a/lib/charms/data_platform_libs/v0/data_interfaces.py b/lib/charms/data_platform_libs/v0/data_interfaces.py index 59a97226..a2162aa0 100644 --- a/lib/charms/data_platform_libs/v0/data_interfaces.py +++ b/lib/charms/data_platform_libs/v0/data_interfaces.py @@ -331,7 +331,7 @@ def _on_topic_requested(self, event: TopicRequestedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 37 +LIBPATCH = 38 PYDEPS = ["ops>=2.0.0"] @@ -2606,6 +2606,14 @@ def set_version(self, relation_id: int, version: str) -> None: """ self.update_relation_data(relation_id, {"version": version}) + def set_subordinated(self, relation_id: int) -> None: + """Raises the subordinated flag in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + """ + self.update_relation_data(relation_id, {"subordinated": "true"}) + class DatabaseProviderEventHandlers(EventHandlers): """Provider-side of the database relation handlers.""" @@ -2842,6 +2850,21 @@ def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: """Event emitted when the database relation has changed.""" + is_subordinate = False + remote_unit_data = None + for key in event.relation.data.keys(): + if isinstance(key, Unit) and not key.name.startswith(self.charm.app.name): + remote_unit_data = event.relation.data[key] + elif isinstance(key, Application) and key.name != self.charm.app.name: + is_subordinate = event.relation.data[key].get("subordinated") == "true" + + if is_subordinate: + if not remote_unit_data: + return + + if remote_unit_data.get("state") != "ready": + return + # Check which data has changed to emit customs events. diff = self._diff(event) diff --git a/lib/charms/tempo_k8s/v1/charm_tracing.py b/lib/charms/tempo_k8s/v1/charm_tracing.py index 000e0cb5..ebe022e0 100644 --- a/lib/charms/tempo_k8s/v1/charm_tracing.py +++ b/lib/charms/tempo_k8s/v1/charm_tracing.py @@ -9,21 +9,57 @@ This means that, if your charm is related to, for example, COS' Tempo charm, you will be able to inspect in real time from the Grafana dashboard the execution flow of your charm. -To start using this library, you need to do two things: +# Quickstart +Fetch the following charm libs (and ensure the minimum version/revision numbers are satisfied): + + charmcraft fetch-lib charms.tempo_k8s.v2.tracing # >= 1.10 + charmcraft fetch-lib charms.tempo_k8s.v1.charm_tracing # >= 2.7 + +Then edit your charm code to include: + +```python +# import the necessary charm libs +from charms.tempo_k8s.v2.tracing import TracingEndpointRequirer, charm_tracing_config +from charms.tempo_k8s.v1.charm_tracing import charm_tracing + +# decorate your charm class with charm_tracing: +@charm_tracing( + # forward-declare the instance attributes that the instrumentor will look up to obtain the + # tempo endpoint and server certificate + tracing_endpoint="tracing_endpoint", + server_cert="server_cert" +) +class MyCharm(CharmBase): + _path_to_cert = "/path/to/cert.crt" + # path to cert file **in the charm container**. Its presence will be used to determine whether + # the charm is ready to use tls for encrypting charm traces. If your charm does not support tls, + # you can ignore this and pass None to charm_tracing_config. + # If you do support TLS, you'll need to make sure that the server cert is copied to this location + # and kept up to date so the instrumentor can use it. + + def __init__(self, ...): + ... + self.tracing = TracingEndpointRequirer(self, ...) + self.tracing_endpoint, self.server_cert = charm_tracing_config(self.tracing, self._path_to_cert) +``` + +# Detailed usage +To use this library, you need to do two things: 1) decorate your charm class with `@trace_charm(tracing_endpoint="my_tracing_endpoint")` -2) add to your charm a "my_tracing_endpoint" (you can name this attribute whatever you like) **property** -that returns an otlp http/https endpoint url. If you are using the `TracingEndpointProvider` as -`self.tracing = TracingEndpointProvider(self)`, the implementation could be: +2) add to your charm a "my_tracing_endpoint" (you can name this attribute whatever you like) +**property**, **method** or **instance attribute** that returns an otlp http/https endpoint url. +If you are using the ``charms.tempo_k8s.v2.tracing.TracingEndpointRequirer`` as +``self.tracing = TracingEndpointRequirer(self)``, the implementation could be: ``` @property def my_tracing_endpoint(self) -> Optional[str]: '''Tempo endpoint for charm tracing''' if self.tracing.is_ready(): - return self.tracing.otlp_http_endpoint() + return self.tracing.get_endpoint("otlp_http") else: return None ``` @@ -33,19 +69,52 @@ def my_tracing_endpoint(self) -> Optional[str]: - every event as a span (including custom events) - every charm method call (except dunders) as a span -if you wish to add more fine-grained information to the trace, you can do so by getting a hold of the tracer like so: + +## TLS support +If your charm integrates with a TLS provider which is also trusted by the tracing provider (the Tempo charm), +you can configure ``charm_tracing`` to use TLS by passing a ``server_cert`` parameter to the decorator. + +If your charm is not trusting the same CA as the Tempo endpoint it is sending traces to, +you'll need to implement a cert-transfer relation to obtain the CA certificate from the same +CA that Tempo is using. + +For example: +``` +from charms.tempo_k8s.v1.charm_tracing import trace_charm +@trace_charm( + tracing_endpoint="my_tracing_endpoint", + server_cert="_server_cert" +) +class MyCharm(CharmBase): + self._server_cert = "/path/to/server.crt" + ... + + def on_tls_changed(self, e) -> Optional[str]: + # update the server cert on the charm container for charm tracing + Path(self._server_cert).write_text(self.get_server_cert()) + + def on_tls_broken(self, e) -> Optional[str]: + # remove the server cert so charm_tracing won't try to use tls anymore + Path(self._server_cert).unlink() +``` + + +## More fine-grained manual instrumentation +if you wish to add more spans to the trace, you can do so by getting a hold of the tracer like so: ``` import opentelemetry ... - @property - def tracer(self) -> opentelemetry.trace.Tracer: - return opentelemetry.trace.get_tracer(type(self).__name__) +def get_tracer(self) -> opentelemetry.trace.Tracer: + return opentelemetry.trace.get_tracer(type(self).__name__) ``` By default, the tracer is named after the charm type. If you wish to override that, you can pass -a different `service_name` argument to `trace_charm`. +a different ``service_name`` argument to ``trace_charm``. + +See the official opentelemetry Python SDK documentation for usage: +https://opentelemetry-python.readthedocs.io/en/latest/ -*Upgrading from `v0`:* +## Upgrading from `v0` If you are upgrading from `charm_tracing` v0, you need to take the following steps (assuming you already have the newest version of the library in your charm): @@ -55,8 +124,9 @@ def tracer(self) -> opentelemetry.trace.Tracer: `opentelemetry-exporter-otlp-proto-http>=1.21.0`. -2) Update the charm method referenced to from `@trace` and `@trace_charm`, -to return from `TracingEndpointRequirer.otlp_http_endpoint()` instead of `grpc_http`. For example: +2) Update the charm method referenced to from ``@trace`` and ``@trace_charm``, +to return from ``TracingEndpointRequirer.get_endpoint("otlp_http")`` instead of ``grpc_http``. +For example: ``` from charms.tempo_k8s.v0.charm_tracing import trace_charm @@ -72,7 +142,7 @@ class MyCharm(CharmBase): def my_tracing_endpoint(self) -> Optional[str]: '''Tempo endpoint for charm tracing''' if self.tracing.is_ready(): - return self.tracing.otlp_grpc_endpoint() + return self.tracing.otlp_grpc_endpoint() # OLD API, DEPRECATED. else: return None ``` @@ -93,13 +163,13 @@ class MyCharm(CharmBase): def my_tracing_endpoint(self) -> Optional[str]: '''Tempo endpoint for charm tracing''' if self.tracing.is_ready(): - return self.tracing.otlp_http_endpoint() + return self.tracing.get_endpoint("otlp_http") # NEW API, use this. else: return None ``` -3) If you were passing a certificate using `server_cert`, you need to change it to provide an *absolute* path to -the certificate file. +3) If you were passing a certificate (str) using `server_cert`, you need to change it to +provide an *absolute* path to the certificate file instead. """ import functools @@ -122,19 +192,19 @@ def my_tracing_endpoint(self) -> Optional[str]: ) import opentelemetry +import ops from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import Span, TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.trace import INVALID_SPAN, Tracer +from opentelemetry.trace import get_current_span as otlp_get_current_span from opentelemetry.trace import ( - INVALID_SPAN, - Tracer, get_tracer, get_tracer_provider, set_span_in_context, set_tracer_provider, ) -from opentelemetry.trace import get_current_span as otlp_get_current_span from ops.charm import CharmBase from ops.framework import Framework @@ -147,14 +217,23 @@ def my_tracing_endpoint(self) -> Optional[str]: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 8 +LIBPATCH = 11 PYDEPS = ["opentelemetry-exporter-otlp-proto-http==1.21.0"] logger = logging.getLogger("tracing") +dev_logger = logging.getLogger("tracing-dev") + +# set this to 0 if you are debugging/developing this library source +dev_logger.setLevel(logging.CRITICAL) + +_CharmType = Type[CharmBase] # the type CharmBase and any subclass thereof +_C = TypeVar("_C", bound=_CharmType) +_T = TypeVar("_T", bound=type) +_F = TypeVar("_F", bound=Type[Callable]) tracer: ContextVar[Tracer] = ContextVar("tracer") -_GetterType = Union[Callable[[CharmBase], Optional[str]], property] +_GetterType = Union[Callable[[_CharmType], Optional[str]], property] CHARM_TRACING_ENABLED = "CHARM_TRACING_ENABLED" @@ -220,11 +299,6 @@ def _span(name: str) -> Generator[Optional[Span], Any, Any]: yield None -_C = TypeVar("_C", bound=Type[CharmBase]) -_T = TypeVar("_T", bound=type) -_F = TypeVar("_F", bound=Type[Callable]) - - class TracingError(RuntimeError): """Base class for errors raised by this module.""" @@ -233,60 +307,78 @@ class UntraceableObjectError(TracingError): """Raised when an object you're attempting to instrument cannot be autoinstrumented.""" -def _get_tracing_endpoint(tracing_endpoint_getter, self, charm): - if isinstance(tracing_endpoint_getter, property): - tracing_endpoint = tracing_endpoint_getter.__get__(self) - else: # method or callable - tracing_endpoint = tracing_endpoint_getter(self) +class TLSError(TracingError): + """Raised when the tracing endpoint is https but we don't have a cert yet.""" + + +def _get_tracing_endpoint( + tracing_endpoint_attr: str, + charm_instance: object, + charm_type: type, +): + _tracing_endpoint = getattr(charm_instance, tracing_endpoint_attr) + if callable(_tracing_endpoint): + tracing_endpoint = _tracing_endpoint() + else: + tracing_endpoint = _tracing_endpoint if tracing_endpoint is None: - logger.debug( - f"{charm}.{tracing_endpoint_getter} returned None; quietly disabling " - f"charm_tracing for the run." - ) return + elif not isinstance(tracing_endpoint, str): raise TypeError( - f"{charm}.{tracing_endpoint_getter} should return a tempo endpoint (string); " + f"{charm_type.__name__}.{tracing_endpoint_attr} should resolve to a tempo endpoint (string); " f"got {tracing_endpoint} instead." ) - else: - logger.debug(f"Setting up span exporter to endpoint: {tracing_endpoint}/v1/traces") + + dev_logger.debug(f"Setting up span exporter to endpoint: {tracing_endpoint}/v1/traces") return f"{tracing_endpoint}/v1/traces" -def _get_server_cert(server_cert_getter, self, charm): - if isinstance(server_cert_getter, property): - server_cert = server_cert_getter.__get__(self) - else: # method or callable - server_cert = server_cert_getter(self) +def _get_server_cert( + server_cert_attr: str, + charm_instance: ops.CharmBase, + charm_type: Type[ops.CharmBase], +): + _server_cert = getattr(charm_instance, server_cert_attr) + if callable(_server_cert): + server_cert = _server_cert() + else: + server_cert = _server_cert if server_cert is None: logger.warning( - f"{charm}.{server_cert_getter} returned None; sending traces over INSECURE connection." + f"{charm_type}.{server_cert_attr} is None; sending traces over INSECURE connection." ) return elif not Path(server_cert).is_absolute(): raise ValueError( - f"{charm}.{server_cert_getter} should return a valid tls cert absolute path (string | Path)); " + f"{charm_type}.{server_cert_attr} should resolve to a valid tls cert absolute path (string | Path)); " f"got {server_cert} instead." ) return server_cert def _setup_root_span_initializer( - charm: Type[CharmBase], - tracing_endpoint_getter: _GetterType, - server_cert_getter: Optional[_GetterType], + charm_type: _CharmType, + tracing_endpoint_attr: str, + server_cert_attr: Optional[str], service_name: Optional[str] = None, ): """Patch the charm's initializer.""" - original_init = charm.__init__ + original_init = charm_type.__init__ @functools.wraps(original_init) def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): + # we're using 'self' here because this is charm init code, makes sense to read what's below + # from the perspective of the charm. Self.unit.name... + original_init(self, framework, *args, **kwargs) + # we call this from inside the init context instead of, say, _autoinstrument, because we want it to + # be checked on a per-charm-instantiation basis, not on a per-type-declaration one. if not is_enabled(): + # this will only happen during unittesting, hopefully, so it's fine to log a + # bit more verbosely logger.info("Tracing DISABLED: skipping root span initialization") return @@ -295,41 +387,41 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): # self.handle = Handle(None, self.handle_kind, None) original_event_context = framework._event_context + # default service name isn't just app name because it could conflict with the workload service name + _service_name = service_name or f"{self.app.name}-charm" - _service_name = service_name or self.app.name - + unit_name = self.unit.name resource = Resource.create( attributes={ "service.name": _service_name, "compose_service": _service_name, "charm_type": type(self).__name__, # juju topology - "juju_unit": self.unit.name, + "juju_unit": unit_name, "juju_application": self.app.name, "juju_model": self.model.name, "juju_model_uuid": self.model.uuid, } ) provider = TracerProvider(resource=resource) - try: - tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_getter, self, charm) - except Exception: - # if anything goes wrong with retrieving the endpoint, we go on with tracing disabled. - # better than breaking the charm. - logger.exception( - f"exception retrieving the tracing " - f"endpoint from {charm}.{tracing_endpoint_getter}; " - f"proceeding with charm_tracing DISABLED. " - ) - return + + # if anything goes wrong with retrieving the endpoint, we let the exception bubble up. + tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_attr, self, charm_type) if not tracing_endpoint: + # tracing is off if tracing_endpoint is None return server_cert: Optional[Union[str, Path]] = ( - _get_server_cert(server_cert_getter, self, charm) if server_cert_getter else None + _get_server_cert(server_cert_attr, self, charm_type) if server_cert_attr else None ) + if tracing_endpoint.startswith("https://") and not server_cert: + raise TLSError( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr." + ) + exporter = OTLPSpanExporter( endpoint=tracing_endpoint, certificate_file=str(Path(server_cert).absolute()) if server_cert else None, @@ -342,16 +434,18 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): _tracer = get_tracer(_service_name) # type: ignore _tracer_token = tracer.set(_tracer) - dispatch_path = os.getenv("JUJU_DISPATCH_PATH", "") + dispatch_path = os.getenv("JUJU_DISPATCH_PATH", "") # something like hooks/install + event_name = dispatch_path.split("/")[1] if "/" in dispatch_path else dispatch_path + root_span_name = f"{unit_name}: {event_name} event" + span = _tracer.start_span(root_span_name, attributes={"juju.dispatch_path": dispatch_path}) # all these shenanigans are to work around the fact that the opentelemetry tracing API is built # on the assumption that spans will be used as contextmanagers. # Since we don't (as we need to close the span on framework.commit), # we need to manually set the root span as current. - span = _tracer.start_span("charm exec", attributes={"juju.dispatch_path": dispatch_path}) ctx = set_span_in_context(span) - # log a trace id so we can look it up in tempo. + # log a trace id, so we can pick it up from the logs (and jhack) to look it up in tempo. root_trace_id = hex(span.get_span_context().trace_id)[2:] # strip 0x prefix logger.debug(f"Starting root trace with id={root_trace_id!r}.") @@ -359,6 +453,7 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): @contextmanager def wrap_event_context(event_name: str): + dev_logger.info(f"entering event context: {event_name}") # when the framework enters an event context, we create a span. with _span("event: " + event_name) as event_context_span: if event_context_span: @@ -372,6 +467,7 @@ def wrap_event_context(event_name: str): @functools.wraps(original_close) def wrap_close(): + dev_logger.info("tearing down tracer and flushing traces") span.end() opentelemetry.context.detach(span_token) # type: ignore tracer.reset(_tracer_token) @@ -383,7 +479,7 @@ def wrap_close(): framework.close = wrap_close return - charm.__init__ = wrap_init + charm_type.__init__ = wrap_init # type: ignore def trace_charm( @@ -391,7 +487,7 @@ def trace_charm( server_cert: Optional[str] = None, service_name: Optional[str] = None, extra_types: Sequence[type] = (), -): +) -> Callable[[_T], _T]: """Autoinstrument the decorated charm with tracing telemetry. Use this function to get out-of-the-box traces for all events emitted on this charm and all @@ -399,7 +495,7 @@ def trace_charm( Usage: >>> from charms.tempo_k8s.v1.charm_tracing import trace_charm - >>> from charms.tempo_k8s.v1.tracing import TracingEndpointProvider + >>> from charms.tempo_k8s.v1.tracing import TracingEndpointRequirer >>> from ops import CharmBase >>> >>> @trace_charm( @@ -409,7 +505,7 @@ def trace_charm( >>> >>> def __init__(self, framework: Framework): >>> ... - >>> self.tracing = TracingEndpointProvider(self) + >>> self.tracing = TracingEndpointRequirer(self) >>> >>> @property >>> def tempo_otlp_http_endpoint(self) -> Optional[str]: @@ -418,24 +514,28 @@ def trace_charm( >>> else: >>> return None >>> - :param server_cert: method or property on the charm type that returns an - optional absolute path to a tls certificate to be used when sending traces to a remote server. - If it returns None, an _insecure_ connection will be used. - :param tracing_endpoint: name of a property on the charm type that returns an - optional (fully resolvable) tempo url. If None, tracing will be effectively disabled. Else, traces will be - pushed to that endpoint. + + :param tracing_endpoint: name of a method, property or attribute on the charm type that returns an + optional (fully resolvable) tempo url to which the charm traces will be pushed. + If None, tracing will be effectively disabled. + :param server_cert: name of a method, property or attribute on the charm type that returns an + optional absolute path to a CA certificate file to be used when sending traces to a remote server. + If it returns None, an _insecure_ connection will be used. To avoid errors in transient + situations where the endpoint is already https but there is no certificate on disk yet, it + is recommended to disable tracing (by returning None from the tracing_endpoint) altogether + until the cert has been written to disk. :param service_name: service name tag to attach to all traces generated by this charm. Defaults to the juju application name this charm is deployed under. :param extra_types: pass any number of types that you also wish to autoinstrument. For example, charm libs, relation endpoint wrappers, workload abstractions, ... """ - def _decorator(charm_type: Type[CharmBase]): + def _decorator(charm_type: _T) -> _T: """Autoinstrument the wrapped charmbase type.""" _autoinstrument( charm_type, - tracing_endpoint_getter=getattr(charm_type, tracing_endpoint), - server_cert_getter=getattr(charm_type, server_cert) if server_cert else None, + tracing_endpoint_attr=tracing_endpoint, + server_cert_attr=server_cert, service_name=service_name, extra_types=extra_types, ) @@ -445,12 +545,12 @@ def _decorator(charm_type: Type[CharmBase]): def _autoinstrument( - charm_type: Type[CharmBase], - tracing_endpoint_getter: _GetterType, - server_cert_getter: Optional[_GetterType] = None, + charm_type: _T, + tracing_endpoint_attr: str, + server_cert_attr: Optional[str] = None, service_name: Optional[str] = None, extra_types: Sequence[type] = (), -) -> Type[CharmBase]: +) -> _T: """Set up tracing on this charm class. Use this function to get out-of-the-box traces for all events emitted on this charm and all @@ -462,29 +562,32 @@ def _autoinstrument( >>> from ops.main import main >>> _autoinstrument( >>> MyCharm, - >>> tracing_endpoint_getter=MyCharm.tempo_otlp_http_endpoint, + >>> tracing_endpoint_attr="tempo_otlp_http_endpoint", >>> service_name="MyCharm", >>> extra_types=(Foo, Bar) >>> ) >>> main(MyCharm) :param charm_type: the CharmBase subclass to autoinstrument. - :param server_cert_getter: method or property on the charm type that returns an - optional absolute path to a tls certificate to be used when sending traces to a remote server. - This needs to be a valid path to a certificate. - :param tracing_endpoint_getter: method or property on the charm type that returns an - optional tempo url. If None, tracing will be effectively disabled. Else, traces will be - pushed to that endpoint. + :param tracing_endpoint_attr: name of a method, property or attribute on the charm type that returns an + optional (fully resolvable) tempo url to which the charm traces will be pushed. + If None, tracing will be effectively disabled. + :param server_cert_attr: name of a method, property or attribute on the charm type that returns an + optional absolute path to a CA certificate file to be used when sending traces to a remote server. + If it returns None, an _insecure_ connection will be used. To avoid errors in transient + situations where the endpoint is already https but there is no certificate on disk yet, it + is recommended to disable tracing (by returning None from the tracing_endpoint) altogether + until the cert has been written to disk. :param service_name: service name tag to attach to all traces generated by this charm. Defaults to the juju application name this charm is deployed under. :param extra_types: pass any number of types that you also wish to autoinstrument. For example, charm libs, relation endpoint wrappers, workload abstractions, ... """ - logger.info(f"instrumenting {charm_type}") + dev_logger.info(f"instrumenting {charm_type}") _setup_root_span_initializer( charm_type, - tracing_endpoint_getter, - server_cert_getter=server_cert_getter, + tracing_endpoint_attr, + server_cert_attr=server_cert_attr, service_name=service_name, ) trace_type(charm_type) @@ -501,12 +604,12 @@ def trace_type(cls: _T) -> _T: It assumes that this class is only instantiated after a charm type decorated with `@trace_charm` has been instantiated. """ - logger.info(f"instrumenting {cls}") + dev_logger.info(f"instrumenting {cls}") for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): - logger.info(f"discovered {method}") + dev_logger.info(f"discovered {method}") if method.__name__.startswith("__"): - logger.info(f"skipping {method} (dunder)") + dev_logger.info(f"skipping {method} (dunder)") continue new_method = trace_method(method) @@ -534,7 +637,7 @@ def trace_function(function: _F) -> _F: def _trace_callable(callable: _F, qualifier: str) -> _F: - logger.info(f"instrumenting {callable}") + dev_logger.info(f"instrumenting {callable}") # sig = inspect.signature(callable) @functools.wraps(callable) diff --git a/lib/charms/tempo_k8s/v2/tracing.py b/lib/charms/tempo_k8s/v2/tracing.py index b4e341c3..8b9fb4f3 100644 --- a/lib/charms/tempo_k8s/v2/tracing.py +++ b/lib/charms/tempo_k8s/v2/tracing.py @@ -72,6 +72,7 @@ def __init__(self, *args): import enum import json import logging +from pathlib import Path from typing import ( TYPE_CHECKING, Any, @@ -82,6 +83,7 @@ def __init__(self, *args): Optional, Sequence, Tuple, + Union, cast, ) @@ -105,7 +107,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 6 +LIBPATCH = 7 PYDEPS = ["pydantic"] @@ -921,3 +923,68 @@ def get_endpoint( return None return endpoint + + +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Utility function to determine the charm_tracing config you will likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + If you are using charm_tracing >= v1.9: + >>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm + >>> from lib.charms.tempo_k8s.v2.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + + If you are using charm_tracing < v1.9: + >>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm + >>> from lib.charms.tempo_k8s.v2.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self._my_endpoint, self._cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + >>> @property + >>> def my_endpoint(self): + >>> return self._my_endpoint + >>> @property + >>> def cert_path(self): + >>> return self._cert_path + + """ + if not endpoint_requirer.is_ready(): + return None, None + + endpoint = endpoint_requirer.get_endpoint("otlp_http") + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None: + raise TracingError("Cannot send traces to an https endpoint without a certificate.") + elif not Path(cert_path).exists(): + # if endpoint is https BUT we don't have a server_cert yet: + # disable charm tracing until we do to prevent tls errors + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None diff --git a/src/relations/cos.py b/src/relations/cos.py index 2041fe7d..de4907cb 100644 --- a/src/relations/cos.py +++ b/src/relations/cos.py @@ -28,12 +28,13 @@ class ExporterConfig: url: str username: str password: str + listen_port: str class COSRelation: """Relation with the cos bundle.""" - _EXPORTER_PORT = "49152" + _EXPORTER_PORT = "9152" HTTP_SERVER_PORT = "8443" _NAME = "cos-agent" _PEER_RELATION_NAME = "cos" @@ -77,6 +78,7 @@ def exporter_user_config(self) -> ExporterConfig: url=f"https://127.0.0.1:{self.HTTP_SERVER_PORT}", username=self.MONITORING_USERNAME, password=self.get_monitoring_password(), + listen_port=self._EXPORTER_PORT, ) @property diff --git a/src/snap.py b/src/snap.py index 8670b7e5..2e5ddd96 100644 --- a/src/snap.py +++ b/src/snap.py @@ -25,8 +25,8 @@ _SNAP_NAME = "charmed-mysql" REVISIONS: typing.Dict[str, str] = { # Keep in sync with `workload_version` file - "x86_64": "106", - "aarch64": "107", + "x86_64": "109", + "aarch64": "110", } revision = REVISIONS[platform.machine()] _snap = snap_lib.SnapCache()[_SNAP_NAME] @@ -220,6 +220,7 @@ def update_mysql_router_exporter_service( if enabled: _snap.set( { + "mysqlrouter-exporter.listen-port": config.listen_port, "mysqlrouter-exporter.user": config.username, "mysqlrouter-exporter.password": config.password, "mysqlrouter-exporter.url": config.url, @@ -241,6 +242,7 @@ def update_mysql_router_exporter_service( _snap.start([self._EXPORTER_SERVICE_NAME], enable=True) else: _snap.stop([self._EXPORTER_SERVICE_NAME], disable=True) + _snap.unset("mysqlrouter-exporter.listen-port") _snap.unset("mysqlrouter-exporter.user") _snap.unset("mysqlrouter-exporter.password") _snap.unset("mysqlrouter-exporter.url") diff --git a/tests/integration/test_exporter.py b/tests/integration/test_exporter.py index 36c0b2af..9e0660fd 100644 --- a/tests/integration/test_exporter.py +++ b/tests/integration/test_exporter.py @@ -6,16 +6,14 @@ import logging import pytest +import requests import tenacity -import urllib3 from pytest_operator.plugin import OpsTest -from . import juju_ from .helpers import ( APPLICATION_DEFAULT_APP_NAME, MYSQL_DEFAULT_APP_NAME, MYSQL_ROUTER_DEFAULT_APP_NAME, - get_tls_certificate_issuer, ) logger = logging.getLogger(__name__) @@ -27,20 +25,11 @@ SLOW_TIMEOUT = 25 * 60 RETRY_TIMEOUT = 3 * 60 -if juju_.is_3_or_higher: - TLS_APP_NAME = "self-signed-certificates" - TLS_CONFIG = {"ca-common-name": "Test CA"} -else: - TLS_APP_NAME = "tls-certificates-operator" - TLS_CONFIG = {"generate-self-signed-certificates": "true", "ca-common-name": "Test CA"} - @pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_exporter_endpoint(ops_test: OpsTest, mysql_router_charm_series: str) -> None: """Test that exporter endpoint is functional.""" - http = urllib3.PoolManager() - # Build and deploy applications mysqlrouter_charm = await ops_test.build_charm(".") @@ -129,114 +118,12 @@ async def test_exporter_endpoint(ops_test: OpsTest, mysql_router_charm_series: s unit_address = await unit.get_public_address() try: - http.request("GET", f"http://{unit_address}:49152/metrics") - except urllib3.exceptions.MaxRetryError as e: - assert ( - "[Errno 111] Connection refused" in e.reason.args[0] - ), "❌ expected connection refused error" + requests.get(f"http://{unit_address}:9152/metrics", stream=False) + except requests.exceptions.ConnectionError as e: + assert "[Errno 111] Connection refused" in str(e), "❌ expected connection refused error" else: assert False, "❌ can connect to metrics endpoint without relation with cos" - # clear connection pool before relating cos-agent which starts mysql_router_exporter - http.clear() - - logger.info("Relating mysqlrouter with grafana agent") - await ops_test.model.relate( - f"{GRAFANA_AGENT_APP_NAME}:cos-agent", f"{MYSQL_ROUTER_APP_NAME}:cos-agent" - ) - - for attempt in tenacity.Retrying( - reraise=True, - stop=tenacity.stop_after_delay(RETRY_TIMEOUT), - wait=tenacity.wait_fixed(10), - ): - with attempt: - jmx_resp = http.request("GET", f"http://{unit_address}:49152/metrics") - assert ( - jmx_resp.status == 200 - ), "❌ cannot connect to metrics endpoint with relation with cos" - assert "mysqlrouter_route_health" in str( - jmx_resp.data - ), "❌ did not find expected metric in response" - - logger.info("Removing relation between mysqlrouter and grafana agent") - await mysql_router_app.remove_relation( - f"{GRAFANA_AGENT_APP_NAME}:cos-agent", f"{MYSQL_ROUTER_APP_NAME}:cos-agent" - ) - - for attempt in tenacity.Retrying( - reraise=True, - stop=tenacity.stop_after_delay(RETRY_TIMEOUT), - wait=tenacity.wait_fixed(10), - ): - with attempt: - try: - http.request("GET", f"http://{unit_address}:49152/metrics") - except urllib3.exceptions.MaxRetryError as e: - assert ( - "[Errno 111] Connection refused" in e.reason.args[0] - ), "❌ expected connection refused error" - else: - assert False, "❌ can connect to metrics endpoint without relation with cos" - - http.clear() - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_exporter_endpoint_with_tls(ops_test: OpsTest) -> None: - """Test that the exporter endpoint works when related with TLS""" - http = urllib3.PoolManager() - - mysql_router_app = ops_test.model.applications[MYSQL_ROUTER_APP_NAME] - mysql_router_unit = mysql_router_app.units[0] - - issuer = await get_tls_certificate_issuer( - ops_test, - mysql_router_unit.name, - socket="/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", - ) - assert ( - "Issuer: CN = MySQL_Router_Auto_Generated_CA_Certificate" in issuer - ), "Expected mysqlrouter autogenerated certificate" - - logger.info(f"Deploying {TLS_APP_NAME}") - await ops_test.model.deploy( - TLS_APP_NAME, - application_name=TLS_APP_NAME, - channel="stable", - config=TLS_CONFIG, - series="jammy", - ) - await ops_test.model.wait_for_idle([TLS_APP_NAME], status="active", timeout=SLOW_TIMEOUT) - - logger.info(f"Relating mysqlrouter with {TLS_APP_NAME}") - - await ops_test.model.relate( - f"{MYSQL_ROUTER_APP_NAME}:certificates", f"{TLS_APP_NAME}:certificates" - ) - - mysql_test_app = ops_test.model.applications[APPLICATION_APP_NAME] - unit_address = await mysql_test_app.units[0].get_public_address() - - for attempt in tenacity.Retrying( - reraise=True, - stop=tenacity.stop_after_delay(RETRY_TIMEOUT), - wait=tenacity.wait_fixed(10), - ): - with attempt: - try: - http.request("GET", f"http://{unit_address}:49152/metrics") - except urllib3.exceptions.MaxRetryError as e: - assert ( - "[Errno 111] Connection refused" in e.reason.args[0] - ), "❌ expected connection refused error" - else: - assert False, "❌ can connect to metrics endpoint without relation with cos" - - # clear connection pool before relating cos-agent which starts mysql_router_exporter - http.clear() - logger.info("Relating mysqlrouter with grafana agent") await ops_test.model.relate( f"{GRAFANA_AGENT_APP_NAME}:cos-agent", f"{MYSQL_ROUTER_APP_NAME}:cos-agent" @@ -248,20 +135,12 @@ async def test_exporter_endpoint_with_tls(ops_test: OpsTest) -> None: wait=tenacity.wait_fixed(10), ): with attempt: - jmx_resp = http.request("GET", f"http://{unit_address}:49152/metrics") + response = requests.get(f"http://{unit_address}:9152/metrics", stream=False) + response.raise_for_status() assert ( - jmx_resp.status == 200 - ), "❌ cannot connect to metrics endpoint with relation with cos" - assert "mysqlrouter_route_health" in str( - jmx_resp.data + "mysqlrouter_route_health" in response.text ), "❌ did not find expected metric in response" - - issuer = await get_tls_certificate_issuer( - ops_test, - mysql_router_unit.name, - socket="/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", - ) - assert "CN = Test CA" in issuer, f"Expected mysqlrouter certificate from {TLS_APP_NAME}" + response.close() logger.info("Removing relation between mysqlrouter and grafana agent") await mysql_router_app.remove_relation( @@ -275,32 +154,10 @@ async def test_exporter_endpoint_with_tls(ops_test: OpsTest) -> None: ): with attempt: try: - http.request("GET", f"http://{unit_address}:49152/metrics") - except urllib3.exceptions.MaxRetryError as e: - assert ( - "[Errno 111] Connection refused" in e.reason.args[0] + requests.get(f"http://{unit_address}:9152/metrics", stream=False) + except requests.exceptions.ConnectionError as e: + assert "[Errno 111] Connection refused" in str( + e ), "❌ expected connection refused error" else: assert False, "❌ can connect to metrics endpoint without relation with cos" - - logger.info(f"Removing relation between mysqlrouter and {TLS_APP_NAME}") - await mysql_router_app.remove_relation( - f"{MYSQL_ROUTER_APP_NAME}:certificates", f"{TLS_APP_NAME}:certificates" - ) - - for attempt in tenacity.Retrying( - reraise=True, - stop=tenacity.stop_after_delay(RETRY_TIMEOUT), - wait=tenacity.wait_fixed(10), - ): - with attempt: - issuer = await get_tls_certificate_issuer( - ops_test, - mysql_router_unit.name, - socket="/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", - ) - assert ( - "Issuer: CN = MySQL_Router_Auto_Generated_CA_Certificate" in issuer - ), "Expected mysqlrouter autogenerated certificate" - - http.clear() diff --git a/tests/integration/test_exporter_with_tls.py b/tests/integration/test_exporter_with_tls.py new file mode 100644 index 00000000..cfe11fc6 --- /dev/null +++ b/tests/integration/test_exporter_with_tls.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. + +import asyncio +import logging + +import pytest +import requests +import tenacity +from pytest_operator.plugin import OpsTest + +from . import juju_ +from .helpers import ( + APPLICATION_DEFAULT_APP_NAME, + MYSQL_DEFAULT_APP_NAME, + MYSQL_ROUTER_DEFAULT_APP_NAME, + get_tls_certificate_issuer, +) + +logger = logging.getLogger(__name__) + +MYSQL_APP_NAME = MYSQL_DEFAULT_APP_NAME +MYSQL_ROUTER_APP_NAME = MYSQL_ROUTER_DEFAULT_APP_NAME +APPLICATION_APP_NAME = APPLICATION_DEFAULT_APP_NAME +GRAFANA_AGENT_APP_NAME = "grafana-agent" +SLOW_TIMEOUT = 25 * 60 +RETRY_TIMEOUT = 3 * 60 + +if juju_.is_3_or_higher: + TLS_APP_NAME = "self-signed-certificates" + TLS_CONFIG = {"ca-common-name": "Test CA"} +else: + TLS_APP_NAME = "tls-certificates-operator" + TLS_CONFIG = {"generate-self-signed-certificates": "true", "ca-common-name": "Test CA"} + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_exporter_endpoint(ops_test: OpsTest, mysql_router_charm_series: str) -> None: + """Test that the exporter endpoint works when related with TLS""" + # Build and deploy applications + mysqlrouter_charm = await ops_test.build_charm(".") + + logger.info("Deploying all the applications") + + # deploy mysqlrouter with num_units=None since it's a subordinate charm + # and will be installed with the related consumer application + applications = await asyncio.gather( + ops_test.model.deploy( + MYSQL_APP_NAME, + channel="8.0/edge", + application_name=MYSQL_APP_NAME, + config={"profile": "testing"}, + num_units=1, + ), + ops_test.model.deploy( + mysqlrouter_charm, + application_name=MYSQL_ROUTER_APP_NAME, + num_units=0, + series=mysql_router_charm_series, + ), + ops_test.model.deploy( + APPLICATION_APP_NAME, + application_name=APPLICATION_APP_NAME, + num_units=1, + # MySQL Router and Grafana agent are subordinate - + # they will use the series of the principal charm + series=mysql_router_charm_series, + channel="latest/edge", + ), + ops_test.model.deploy( + GRAFANA_AGENT_APP_NAME, + application_name=GRAFANA_AGENT_APP_NAME, + num_units=0, + channel="latest/stable", + series=mysql_router_charm_series, + ), + ) + + [mysql_app, mysql_router_app, mysql_test_app, grafana_agent_app] = applications + + logger.info("Relating mysqlrouter and grafana-agent with mysql-test-app") + + await ops_test.model.relate( + f"{MYSQL_ROUTER_APP_NAME}:database", f"{APPLICATION_APP_NAME}:database" + ) + + await ops_test.model.relate( + f"{APPLICATION_APP_NAME}:juju-info", f"{GRAFANA_AGENT_APP_NAME}:juju-info" + ) + + async with ops_test.fast_forward(): + await asyncio.gather( + ops_test.model.block_until(lambda: mysql_app.status == "active", timeout=SLOW_TIMEOUT), + ops_test.model.block_until( + lambda: mysql_router_app.status == "blocked", timeout=SLOW_TIMEOUT + ), + ops_test.model.block_until( + lambda: mysql_test_app.status == "waiting", timeout=SLOW_TIMEOUT + ), + ops_test.model.block_until( + lambda: grafana_agent_app.status == "blocked", timeout=SLOW_TIMEOUT + ), + ) + + logger.info("Relating mysqlrouter with mysql") + + await ops_test.model.relate( + f"{MYSQL_ROUTER_APP_NAME}:backend-database", f"{MYSQL_APP_NAME}:database" + ) + + await asyncio.gather( + ops_test.model.block_until(lambda: mysql_app.status == "active", timeout=SLOW_TIMEOUT), + ops_test.model.block_until( + lambda: mysql_router_app.status == "active", timeout=SLOW_TIMEOUT + ), + ops_test.model.block_until( + lambda: mysql_test_app.status == "active", timeout=SLOW_TIMEOUT + ), + ops_test.model.block_until( + lambda: grafana_agent_app.status == "blocked", timeout=SLOW_TIMEOUT + ), + ) + + mysql_router_unit = mysql_router_app.units[0] + + issuer = await get_tls_certificate_issuer( + ops_test, + mysql_router_unit.name, + socket="/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", + ) + assert ( + "Issuer: CN = MySQL_Router_Auto_Generated_CA_Certificate" in issuer + ), "Expected mysqlrouter autogenerated certificate" + + logger.info(f"Deploying {TLS_APP_NAME}") + await ops_test.model.deploy( + TLS_APP_NAME, + application_name=TLS_APP_NAME, + channel="stable", + config=TLS_CONFIG, + series="jammy", + ) + await ops_test.model.wait_for_idle([TLS_APP_NAME], status="active", timeout=SLOW_TIMEOUT) + + logger.info(f"Relating mysqlrouter with {TLS_APP_NAME}") + + await ops_test.model.relate( + f"{MYSQL_ROUTER_APP_NAME}:certificates", f"{TLS_APP_NAME}:certificates" + ) + + unit_address = await mysql_test_app.units[0].get_public_address() + + for attempt in tenacity.Retrying( + reraise=True, + stop=tenacity.stop_after_delay(RETRY_TIMEOUT), + wait=tenacity.wait_fixed(10), + ): + with attempt: + try: + requests.get(f"http://{unit_address}:9152/metrics", stream=False) + except requests.exceptions.ConnectionError as e: + assert "[Errno 111] Connection refused" in str( + e + ), "❌ expected connection refused error" + else: + assert False, "❌ can connect to metrics endpoint without relation with cos" + + logger.info("Relating mysqlrouter with grafana agent") + await ops_test.model.relate( + f"{GRAFANA_AGENT_APP_NAME}:cos-agent", f"{MYSQL_ROUTER_APP_NAME}:cos-agent" + ) + + for attempt in tenacity.Retrying( + reraise=True, + stop=tenacity.stop_after_delay(RETRY_TIMEOUT), + wait=tenacity.wait_fixed(10), + ): + with attempt: + response = requests.get(f"http://{unit_address}:9152/metrics", stream=False) + response.raise_for_status() + assert ( + "mysqlrouter_route_health" in response.text + ), "❌ did not find expected metric in response" + response.close() + + for attempt in tenacity.Retrying( + reraise=True, + stop=tenacity.stop_after_delay(RETRY_TIMEOUT), + wait=tenacity.wait_fixed(10), + ): + with attempt: + issuer = await get_tls_certificate_issuer( + ops_test, + mysql_router_unit.name, + socket="/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", + ) + assert ( + "CN = Test CA" in issuer + ), f"Expected mysqlrouter certificate from {TLS_APP_NAME}" + + logger.info("Removing relation between mysqlrouter and grafana agent") + await mysql_router_app.remove_relation( + f"{GRAFANA_AGENT_APP_NAME}:cos-agent", f"{MYSQL_ROUTER_APP_NAME}:cos-agent" + ) + + for attempt in tenacity.Retrying( + reraise=True, + stop=tenacity.stop_after_delay(RETRY_TIMEOUT), + wait=tenacity.wait_fixed(10), + ): + with attempt: + try: + requests.get(f"http://{unit_address}:9152/metrics", stream=False) + except requests.exceptions.ConnectionError as e: + assert "[Errno 111] Connection refused" in str( + e + ), "❌ expected connection refused error" + else: + assert False, "❌ can connect to metrics endpoint without relation with cos" + + logger.info(f"Removing relation between mysqlrouter and {TLS_APP_NAME}") + await mysql_router_app.remove_relation( + f"{MYSQL_ROUTER_APP_NAME}:certificates", f"{TLS_APP_NAME}:certificates" + ) + + for attempt in tenacity.Retrying( + reraise=True, + stop=tenacity.stop_after_delay(RETRY_TIMEOUT), + wait=tenacity.wait_fixed(10), + ): + with attempt: + issuer = await get_tls_certificate_issuer( + ops_test, + mysql_router_unit.name, + socket="/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", + ) + assert ( + "Issuer: CN = MySQL_Router_Auto_Generated_CA_Certificate" in issuer + ), "Expected mysqlrouter autogenerated certificate"