Skip to content

Commit

Permalink
Datasource exchange (#86)
Browse files Browse the repository at this point in the history
* precommit

* ds exchange implementation

* interface tests passing

* explicit CRI dep

* unpinned CRI branch

* git build dep

* added grafana uid

* removed ctx

* fixed ds test

* pr comments

* pr comments

* adjusted comment
  • Loading branch information
PietroPasotti authored Dec 12, 2024
1 parent 4633db8 commit fd2679a
Show file tree
Hide file tree
Showing 9 changed files with 301 additions and 27 deletions.
59 changes: 59 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-ast
- id: check-builtin-literals
- id: check-docstring-first
- id: check-merge-conflict
- id: check-yaml
- id: check-toml
- id: debug-statements
- id: trailing-whitespace
- repo: local
hooks:
- id: fmt
name: fmt
entry: tox -e fmt --
language: system
- repo: local
hooks:
- id: static-charm
name: static-charm
entry: tox -e static-charm --
language: system
- repo: local
hooks:
- id: static-lib
name: static-lib
entry: tox -e static-lib --
language: system
- repo: https://github.com/asottile/blacken-docs
rev: 1.13.0
hooks:
- id: blacken-docs
additional_dependencies: [black==23.3]
- repo: https://github.com/pre-commit/mirrors-prettier
rev: "v2.7.1"
hooks:
- id: prettier
additional_dependencies:
- [email protected]
- "@prettier/[email protected]"
args: ["--print-width=120", "--prose-wrap=always"]
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.33.0
hooks:
- id: markdownlint
- repo: local
hooks:
- id: changelogs-rst
name: Changelog filenames
language: fail
entry: "changelog files must be named ####.(feature|bugfix|doc|removal|misc).rst"
# exclude: ^docs/changelog/(\d+\.(feature|bugfix|doc|removal|misc).rst|template.jinja2)
files: ^docs/changelog/
- repo: meta
hooks:
- id: check-hooks-apply
- id: check-useless-excludes
42 changes: 27 additions & 15 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,16 @@ provides:
description: |
Integration to offer other charms the possibility to send traces to Tempo.
requires:
self-charm-tracing:
interface: tracing
description: |
Integration to enable Tempo to send its own charm traces to another Tempo instance.
Integration to enable Tempo to send its own charm traces to another Tempo instance.
limit: 1
self-workload-tracing:
interface: tracing
description: |
Integration to enable Tempo to send its own workload traces to another Tempo instance.
Integration to enable Tempo to send its own workload traces to another Tempo instance.
limit: 1
s3:
interface: s3
Expand All @@ -92,12 +91,16 @@ requires:
interface: tls-certificates
limit: 1
description: |
Certificate and key files for securing Tempo internal and external
Certificate and key files for securing Tempo internal and external
communications with TLS.
send-remote-write:
interface: prometheus_remote_write
description: |
Prometheus-like remote write endpoints to push traces' metrics generated by the `metrics-generator` component.
receive-datasource:
interface: grafana_datasource_exchange
description: |
Integration to share with other COS components this charm's datasources, and receive theirs.
storage:
data:
Expand All @@ -115,7 +118,6 @@ peers:
description: |
peer relation for internal coordination
bases:
- build-on:
- name: "ubuntu"
Expand All @@ -126,9 +128,9 @@ bases:

parts:
charm:
# uncomment this if you add git+ dependencies in requirements.txt
# build-packages:
# - "git"
# uncomment this if you add git+ dependencies in requirements.txt:
#build-packages:
# - "git"
charm-binary-python-packages:
- "pydantic>=2"
- "cryptography"
Expand All @@ -139,28 +141,38 @@ config:
options:
retention-period:
description: |
Maximum trace retention period, in hours. This will be used to configure the compactor to clean up trace data after this time.
Maximum trace retention period, in hours. This will be used to configure the compactor to clean up trace data after this time.
Defaults to 720 hours, which is equivalent to 30 days. Per-stream retention limits are currently not supported.
type: int
default: 720
always_enable_zipkin:
description: Force-enable the receiver for the 'zipkin' protocol in Tempo, even if there is no integration currently requesting it.
description: |
Force-enable the receiver for the 'zipkin' protocol in Tempo, even if there is no integration currently
requesting it.
type: boolean
default: false
always_enable_otlp_grpc:
description: Force-enable the receiver for the 'otlp_grpc' protocol in Tempo, even if there is no integration currently requesting it.
description: |
Force-enable the receiver for the 'otlp_grpc' protocol in Tempo, even if there is no integration currently
requesting it.
type: boolean
default: false
always_enable_otlp_http:
description: Force-enable the receiver for the 'otlp_http' protocol in Tempo, even if there is no integration currently requesting it.
description: |
Force-enable the receiver for the 'otlp_http' protocol in Tempo, even if there is no integration currently
requesting it.
type: boolean
default: false
always_enable_jaeger_thrift_http:
description: Force-enable the receiver for the 'jaeger_thrift_http' protocol in Tempo, even if there is no integration currently requesting it.
description: |
Force-enable the receiver for the 'jaeger_thrift_http' protocol in Tempo, even if there is no integration
currently requesting it.
type: boolean
default: false
always_enable_jaeger_grpc:
description: Force-enable the receiver for the 'jaeger_grpc' protocol in Tempo, even if there is no integration currently requesting it.
description: |
Force-enable the receiver for the 'jaeger_grpc' protocol in Tempo, even if there is no integration currently
requesting it.
type: boolean
default: false
cpu_limit:
Expand All @@ -174,4 +186,4 @@ config:
K8s memory resource limit, e.g. "1Gi". Default is unset (no limit). This value is used
for the "limits" portion of the resource requirements.
See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
type: string
type: string
23 changes: 20 additions & 3 deletions lib/charms/grafana_k8s/v0/grafana_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def __init__(self, *args):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 22
LIBPATCH = 24

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -432,13 +432,22 @@ def update_source(self, source_url: Optional[str] = ""):
def get_source_uids(self) -> Dict[str, Dict[str, str]]:
"""Get the datasource UID(s) assigned by the remote end(s) to this datasource.
Returns a mapping from remote application names to unit names to datasource uids.
Returns a mapping from remote application UIDs to unit names to datasource uids.
"""
uids = {}
for rel in self._charm.model.relations.get(self._relation_name, []):
if not rel:
continue
uids[rel.app.name] = json.loads(rel.data[rel.app]["datasource_uids"])
app_databag = rel.data[rel.app]
grafana_uid = app_databag.get("grafana_uid")
if not grafana_uid:
logger.warning(
"remote end is using an old grafana_datasource interface: "
"`grafana_uid` field not found."
)
continue

uids[grafana_uid] = json.loads(app_databag.get("datasource_uids", "{}"))
return uids

def _set_sources_from_event(self, event: RelationJoinedEvent) -> None:
Expand Down Expand Up @@ -568,6 +577,14 @@ def _publish_source_uids(self, rel: Relation, uids: Dict[str, str]):
Assumes only leader unit will call this method
"""
unique_grafana_name = "juju_{}_{}_{}_{}".format(
self._charm.model.name,
self._charm.model.uuid,
self._charm.model.app.name,
self._charm.model.unit.name.split("/")[1], # type: ignore
)

rel.data[self._charm.app]["grafana_uid"] = unique_grafana_name
rel.data[self._charm.app]["datasource_uids"] = json.dumps(uids)

def _get_source_config(self, rel: Relation):
Expand Down
12 changes: 6 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# pin importlib-metadata version else charmcraft pack will fail to resolve the dependencies for the pydeps-installed otlp library
importlib-metadata~=6.0.0
ops>=2.17
crossplane
jsonschema==4.17.0
lightkube>=0.15.4
lightkube-models>=1.24.1.4
tenacity==8.2.3
pydantic>=2
cosl>=0.0.46

# crossplane is a package from nginxinc to interact with the Nginx config
crossplane

Expand All @@ -16,7 +17,6 @@ opentelemetry-exporter-otlp-proto-http==1.21.0
# lib/charms/tls_certificates_interface/v2/tls_certificates.py
jsonschema
cryptography
# lib/charms/tempo_coordinator_k8s/v1/tracing.py
pydantic>=2
# lib/charms/prometheus_k8s/v0/prometheus_scrape.py
cosl>=0.0.43

# pin importlib-metadata version else charmcraft pack will fail to resolve the dependencies for the pydeps-installed otlp library
importlib-metadata~=6.0.0
53 changes: 52 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
)
from charms.traefik_k8s.v0.traefik_route import TraefikRouteRequirer
from cosl.coordinated_workers.coordinator import ClusterRolesConfig, Coordinator
from cosl.coordinated_workers.interface import DatabagModel
from cosl.coordinated_workers.nginx import CA_CERT_PATH, CERT_PATH, KEY_PATH
from cosl.interfaces.datasource_exchange import DatasourceDict
from cosl.interfaces.utils import DatabagModel
from ops import CollectStatusEvent
from ops.charm import CharmBase

Expand Down Expand Up @@ -108,6 +109,8 @@ def __init__(self, *args):
"s3": "s3",
"charm-tracing": "self-charm-tracing",
"workload-tracing": "self-workload-tracing",
"send-datasource": None,
"receive-datasource": "receive-datasource",
},
nginx_config=NginxConfig(server_name=self.hostname).config,
workers_config=self.tempo.config,
Expand Down Expand Up @@ -443,13 +446,61 @@ def remote_write_endpoints(self):
"""Return remote-write endpoints."""
return self._remote_write.endpoints

def _update_source_exchange(self) -> None:
"""Update the grafana-datasource-exchange relations with what we receive from grafana-source."""

# leader operation only
if not self.unit.is_leader():
return

# Each grafana we're sending our data to gives us back a mapping from unit names to datasource UIDs.
# so if we have two tempo units, and we're related to two grafanas,
# the grafana-source databag will look something like this:
# {"grafana_uid": "1234-1234-1234-1234",
# "datasource_uids": {
# "grafana/0": {
# "tempo/0": "0000-0000-0000-0000",
# "tempo/1": "0000-0000-0000-0001"
# },
# "grafana/1": {
# "tempo/0": "0000-0000-0000-0000",
# "tempo/1": "0000-0000-0000-0001"
# },
# }}
# This is an implementation detail, but the UID is 'unique-per-grafana' and ATM it turns out to be
# deterministic given tempo's jujutopology, so if we are related to multiple grafanas,
# the UIDs they assign to our units will be the same.
# Either way, we assume for simplicity (also, why not) that we're sending the same data to
# different grafanas, and not somehow different subsets to different places. Therefore, it should not
# matter which grafana you talk to, to cross-reference the data this unit is presenting to the world.
# However, if at some point grafana's algorithm to generate source UIDs changes, we'd be in trouble since
# the unit we are sharing our data to might be talking to 'another grafana' which might be using a
# different UID convention!

# we might have multiple grafana-source relations, this method collects them all and returns a mapping from
# the `grafana_uid` to the contents of the `datasource_uids` field
grafana_uids_to_units_to_uids = self.grafana_source_provider.get_source_uids()
raw_datasources: List[DatasourceDict] = []

for grafana_uid, ds_uids in grafana_uids_to_units_to_uids.items():
# we don't need the grafana unit name
for _, ds_uid in ds_uids.items():
# we also don't care about which unit's server we're looking at, since hopefully the data is the same.
raw_datasources.append(
{"type": "tempo", "uid": ds_uid, "grafana_uid": grafana_uid}
)

# publish() already sorts the data for us, to prevent databag flapping and ensuing event storms
self.coordinator.datasource_exchange.publish(datasources=raw_datasources)

def _reconcile(self):
# This method contains unconditional update logic, i.e. logic that should be executed
# regardless of the event we are processing.
# reason is, if we miss these events because our coordinator cannot process events (inconsistent status),
# we need to 'remember' to run this logic as soon as we become ready, which is hard and error-prone
self._update_ingress_relation()
self._update_tracing_relations()
self._update_source_exchange()


if __name__ == "__main__": # pragma: nocover
Expand Down
29 changes: 27 additions & 2 deletions tests/interface/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Copyright 2022 Canonical Ltd.
# See LICENSE file for licensing details.
import json
import pathlib
from contextlib import ExitStack
from shutil import rmtree
from unittest.mock import MagicMock, patch

import pytest
Expand Down Expand Up @@ -63,6 +66,11 @@
},
)

grafana_source_relation = Relation(
"grafana-source",
remote_app_data={"datasources": json.dumps({"tempo/0": {"type": "tempo", "uid": "01234"}})},
)

peers = PeerRelation("peers", peers_data={1: {}})

k8s_resource_patch_ready = MagicMock(return_value=True)
Expand All @@ -82,9 +90,13 @@ def patch_all():
)
)
stack.enter_context(charm_tracing_disabled())

yield

# cleanup: some tests create a spurious src folder for alert rules in ./
src_root = pathlib.Path(__file__).parent / "src"
if src_root.exists():
rmtree(src_root)


# Interface tests are centrally hosted at https://github.com/canonical/charm-relation-interfaces.
# this fixture is used by the test runner of charm-relation-interfaces to test tempo's compliance
Expand Down Expand Up @@ -138,7 +150,20 @@ def grafana_datasource_tester(interface_tester: InterfaceTester):
state_template=State(
leader=True,
containers=[nginx_container, nginx_prometheus_exporter_container],
relations=[peers, cluster_relation],
relations=[peers, s3_relation, cluster_relation],
),
)
yield interface_tester


@pytest.fixture
def grafana_datasource_exchange_tester(interface_tester: InterfaceTester):
interface_tester.configure(
charm_type=TempoCoordinatorCharm,
state_template=State(
leader=True,
containers=[nginx_container, nginx_prometheus_exporter_container],
relations=[peers, s3_relation, cluster_relation, grafana_source_relation],
),
)
yield interface_tester
Loading

0 comments on commit fd2679a

Please sign in to comment.