Skip to content

Commit

Permalink
Implement grafana_datasource_exchange (#654)
Browse files Browse the repository at this point in the history
* add send-datasource endpoint

* fix UTs

* fetch lib

* remove juju top lib

* add raise_on_error=false

* incr idle period

* PR comments

* remove unit_name
  • Loading branch information
michaeldmitry authored Dec 13, 2024
1 parent cf05660 commit 8ab1816
Show file tree
Hide file tree
Showing 14 changed files with 236 additions and 56 deletions.
6 changes: 5 additions & 1 deletion metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ provides:
interface: grafana_dashboard
receive-remote-write:
interface: prometheus_remote_write

send-datasource:
interface: grafana_datasource_exchange
description: |
Integration to share with other COS components this charm's grafana datasources, and receive theirs.
requires:
metrics-endpoint:
interface: prometheus_scrape
Expand Down
5 changes: 2 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
cosl
cosl>=0.0.46
cryptography
jsonschema
# pinned to 2.15 as 2.16 breaks our scenario tests and 2.17 breaks our unittests
ops == 2.15
ops
pyaml
requests
lightkube >= 0.11
Expand Down
42 changes: 41 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import socket
import subprocess
from pathlib import Path
from typing import Dict, Optional, Tuple, TypedDict, cast
from typing import Dict, List, Optional, Tuple, TypedDict, cast
from urllib.parse import urlparse

import yaml
Expand Down Expand Up @@ -43,6 +43,7 @@
IngressPerUnitRevokedForUnitEvent,
)
from cosl import JujuTopology
from cosl.interfaces.datasource_exchange import DatasourceDict, DatasourceExchange
from lightkube.core.client import Client
from lightkube.core.exceptions import ApiError as LightkubeApiError
from lightkube.resources.core_v1 import PersistentVolumeClaim, Pod
Expand Down Expand Up @@ -232,6 +233,11 @@ def __init__(self, *args):
self.charm_tracing_endpoint, self.server_cert = charm_tracing_config(
self.charm_tracing, self._ca_cert_path
)
self.datasource_exchange = DatasourceExchange(
self,
provider_endpoint="send-datasource",
requirer_endpoint=None,
)

self.framework.observe(self.on.prometheus_pebble_ready, self._on_pebble_ready)
self.framework.observe(self.on.config_changed, self._configure)
Expand All @@ -246,8 +252,23 @@ def __init__(self, *args):
self.framework.observe(self.alertmanager_consumer.on.cluster_changed, self._configure)
self.framework.observe(self.resources_patch.on.patch_failed, self._on_k8s_patch_failed)
self.framework.observe(self.on.validate_configuration_action, self._on_validate_config)
self.framework.observe(
self.on.send_datasource_relation_changed, self._on_grafana_source_changed
)
self.framework.observe(
self.on.send_datasource_relation_departed, self._on_grafana_source_changed
)
self.framework.observe(
self.on.grafana_source_relation_changed, self._on_grafana_source_changed
)
self.framework.observe(
self.on.grafana_source_relation_departed, self._on_grafana_source_changed
)
self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status)

def _on_grafana_source_changed(self, _):
self._update_datasource_exchange()

def _on_collect_unit_status(self, event: CollectStatusEvent):
# "Pull" statuses
retention_time = self.model.config.get("metrics_retention_time", "")
Expand Down Expand Up @@ -1106,6 +1127,25 @@ def _push(self, path, contents):
"""Push file to container, creating subdirs as necessary."""
self.container.push(path, contents, make_dirs=True, encoding="utf-8")

def _update_datasource_exchange(self) -> None:
"""Update the grafana-datasource-exchange relations."""
if not self.unit.is_leader():
return

# we might have multiple grafana-source relations, this method collects them all and returns a mapping from
# the `grafana_uid` to the contents of the `datasource_uids` field
# for simplicity, we assume that we're sending the same data to different grafanas.
# read more in https://discourse.charmhub.io/t/tempo-ha-docs-correlating-traces-metrics-logs/16116
grafana_uids_to_units_to_uids = self.grafana_source_provider.get_source_uids()
raw_datasources: List[DatasourceDict] = []

for grafana_uid, ds_uids in grafana_uids_to_units_to_uids.items():
for _, ds_uid in ds_uids.items():
raw_datasources.append(
{"type": "prometheus", "uid": ds_uid, "grafana_uid": grafana_uid}
)
self.datasource_exchange.publish(datasources=raw_datasources)

@property
def workload_tracing_endpoint(self) -> Optional[str]:
"""Tempo endpoint for workload tracing."""
Expand Down
6 changes: 5 additions & 1 deletion tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,9 @@ async def deploy_and_configure_minio(ops_test: OpsTest) -> None:
"secret-key": "secretkey",
}
await ops_test.model.deploy("minio", channel="edge", trust=True, config=config)
await ops_test.model.wait_for_idle(apps=["minio"], status="active", timeout=2000)
await ops_test.model.wait_for_idle(
apps=["minio"], status="active", timeout=2000, idle_period=45
)
minio_addr = await unit_address(ops_test, "minio", 0)

mc_client = Minio(
Expand Down Expand Up @@ -350,6 +352,8 @@ async def deploy_tempo_cluster(ops_test: OpsTest):
status="active",
timeout=2000,
idle_period=30,
# TODO: remove when https://github.com/canonical/tempo-coordinator-k8s-operator/issues/90 is fixed
raise_on_error=False,
)


Expand Down
49 changes: 41 additions & 8 deletions tests/interface/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
# # See LICENSE file for licensing details.
# from unittest.mock import patch

import json
from unittest.mock import patch

import pytest
from charms.tempo_coordinator_k8s.v0.charm_tracing import charm_tracing_disabled
from interface_tester import InterfaceTester
from scenario import Container, ExecOutput, State
from scenario import Container, Exec, Relation, State

from charm import PrometheusCharm

Expand All @@ -27,16 +29,34 @@ def prometheus_charm():
_promtool_check_config=lambda *_: ("stdout", ""),
_prometheus_version="0.1.0",
):
yield PrometheusCharm
with charm_tracing_disabled():
yield PrometheusCharm


prometheus_container = Container(
name="prometheus",
can_connect=True,
execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")},
)

grafana_source_relation = Relation(
"grafana-source",
remote_app_data={
"datasource_uids": json.dumps({"prometheus/0": "01234"}),
"grafana_uid": "5678",
},
)

grafana_datasource_exchange_relation = Relation(
"send-datasource",
remote_app_data={
"datasources": json.dumps([{"type": "prometheus", "uid": "01234", "grafana_uid": "5678"}])
},
)


def begin_with_initial_hooks_isolated() -> State:
container = Container(
"prometheus",
can_connect=True,
exec_mock={("update-ca-certificates", "--fresh"): ExecOutput(return_code=0, stdout="")},
)
state = State(containers=[container], leader=True)
state = State(containers=[prometheus_container], leader=True)
return state


Expand All @@ -47,3 +67,16 @@ def interface_tester(interface_tester: InterfaceTester, prometheus_charm):
state_template=begin_with_initial_hooks_isolated(),
)
yield interface_tester


@pytest.fixture
def grafana_datasource_exchange_tester(interface_tester: InterfaceTester, prometheus_charm):
interface_tester.configure(
charm_type=prometheus_charm,
state_template=State(
leader=True,
containers=[prometheus_container],
relations=[grafana_source_relation, grafana_datasource_exchange_relation],
),
)
yield interface_tester
13 changes: 13 additions & 0 deletions tests/interface/test_grafana_datasource_exchange.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
from interface_tester import InterfaceTester


def test_grafana_datasource_exchange_v0_interface(
grafana_datasource_exchange_tester: InterfaceTester,
):
grafana_datasource_exchange_tester.configure(
interface_name="grafana_datasource_exchange",
interface_version=0,
)
grafana_datasource_exchange_tester.run()
15 changes: 13 additions & 2 deletions tests/scenario/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from unittest.mock import patch

import pytest
from scenario import Context
from charms.tempo_coordinator_k8s.v0.charm_tracing import charm_tracing_disabled
from scenario import Container, Context, Exec

from charm import PrometheusCharm

Expand All @@ -25,9 +26,19 @@ def prometheus_charm():
_promtool_check_config=lambda *_: ("stdout", ""),
_prometheus_version="0.1.0",
):
yield PrometheusCharm
with charm_tracing_disabled():
yield PrometheusCharm


@pytest.fixture(scope="function")
def context(prometheus_charm):
return Context(charm_type=prometheus_charm, juju_version="3.0.3")


@pytest.fixture(scope="function")
def prometheus_container():
return Container(
"prometheus",
can_connect=True,
execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")},
)
44 changes: 24 additions & 20 deletions tests/scenario/helpers.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,62 @@
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.

from scenario import Container, Context, ExecOutput, Network, PeerRelation, Relation, State
import dataclasses

from scenario import Container, Context, Exec, PeerRelation, Relation, State


def begin_with_initial_hooks_isolated(context: Context, *, leader: bool = True) -> State:
container = Container(
"prometheus",
can_connect=False,
exec_mock={("update-ca-certificates", "--fresh"): ExecOutput(return_code=0, stdout="")},
execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")},
)
state = State(containers=[container])
peer_rel = PeerRelation("prometheus-peers")

state = context.run("install", state)
state = context.run(context.on.install(), state)

state = state.replace(relations=[peer_rel])
state = context.run(peer_rel.created_event, state)
state = dataclasses.replace(state, relations=[peer_rel])
state = context.run(context.on.relation_created(peer_rel), state)

if leader:
state = state.replace(leader=True)
state = context.run("leader-elected", state)
state = dataclasses.replace(state, leader=True)
state = context.run(context.on.leader_elected(), state)
else:
state = state.replace(leader=False)
state = context.run("leader-settings-changed", state)
state = dataclasses.replace(state, leader=False)
state = context.run(context.on.leader_elected(), state)

state = context.run("config-changed", state)
state = context.run(context.on.config_changed(), state)

container = container.replace(can_connect=True)
state = state.replace(containers=[container])
state = context.run(container.pebble_ready_event, state)
container = dataclasses.replace(container, can_connect=True)
state = dataclasses.replace(state, containers=[container])
state = context.run(context.on.pebble_ready(container), state)

state = context.run("start", state)
state = context.run(context.on.start(), state)

return state


def add_relation_sequence(context: Context, state: State, relation: Relation):
"""Helper to simulate a relation-added sequence."""
# TODO consider adding to scenario.sequences
state_with_relation = state.replace(
relations=state.relations + [relation],
networks=state.networks + [Network.default(relation.endpoint)],
state_with_relation = dataclasses.replace(
state,
relations=state.relations.union([relation]),
)
state_after_relation_created = context.run(
context.on.relation_created(relation), state_with_relation
)
state_after_relation_created = context.run(relation.created_event, state_with_relation)

# relation is not mutated!
relation_1 = state_after_relation_created.get_relations(relation.endpoint)[0]
state_after_relation_joined = context.run(
relation_1.joined_event, state_after_relation_created
context.on.relation_joined(relation_1), state_after_relation_created
)

relation_2 = state_after_relation_joined.get_relations(relation.endpoint)[0]
state_after_relation_changed = context.run(
relation_2.changed_event, state_after_relation_joined
context.on.relation_changed(relation_2), state_after_relation_joined
)
return state_after_relation_changed
6 changes: 3 additions & 3 deletions tests/scenario/test_alert_expression_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json

import yaml
from scenario import Container, ExecOutput, Relation, State
from scenario import Container, Exec, Relation, State


def test_alert_expression_labels(context):
Expand Down Expand Up @@ -42,10 +42,10 @@ def test_alert_expression_labels(context):
container = Container(
name="prometheus",
can_connect=True,
exec_mock={("update-ca-certificates", "--fresh"): ExecOutput(return_code=0, stdout="")},
execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")},
)
state = State(containers=[container], relations=[remote_write_relation])
context.run(event=remote_write_relation.changed_event, state=state)
context.run(context.on.relation_changed(remote_write_relation), state=state)
rules_file = (
container.get_filesystem(context)
/ "etc/prometheus/rules/juju_foobar-model_d07df316_remote-app.rules"
Expand Down
8 changes: 4 additions & 4 deletions tests/scenario/test_brute_isolated.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@

def test_startup_shutdown_sequence(context: Context):
state = begin_with_initial_hooks_isolated(context)
state = context.run("update-status", state)
state = context.run(context.on.update_status(), state)

for peer_rel in state.get_relations("replicas"):
state = context.run(peer_rel.departed_event, state)
state = context.run(context.on.relation_departed(peer_rel), state)

state = context.run("stop", state)
context.run("remove", state)
state = context.run(context.on.stop(), state)
context.run(context.on.remove(), state)
Loading

0 comments on commit 8ab1816

Please sign in to comment.