Skip to content

Commit

Permalink
enable service graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeldmitry committed Dec 11, 2024
1 parent 9f747dc commit 30171ae
Show file tree
Hide file tree
Showing 2 changed files with 379 additions and 48 deletions.
149 changes: 132 additions & 17 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
# See LICENSE file for licensing details.

"""Charmed Operator for Tempo; a lightweight object storage based tracing backend."""
import json
import logging
import re
import socket
from pathlib import Path
from subprocess import CalledProcessError, getoutput
from typing import Dict, List, Optional, Set, Tuple, cast, get_args
from typing import Any, Dict, List, Optional, Set, Tuple, cast, get_args

import ops
from charms.grafana_k8s.v0.grafana_source import GrafanaSourceProvider
Expand All @@ -25,8 +26,8 @@
from charms.traefik_k8s.v0.traefik_route import TraefikRouteRequirer
from cosl.coordinated_workers.coordinator import ClusterRolesConfig, Coordinator
from cosl.coordinated_workers.nginx import CA_CERT_PATH, CERT_PATH, KEY_PATH
from cosl.interfaces.datasource_exchange import DatasourceDict
from cosl.interfaces.utils import DatabagModel
from cosl.interfaces.datasource_exchange import DatasourceDict, DSExchangeAppData
from cosl.interfaces.utils import DatabagModel, DataValidationError
from ops import CollectStatusEvent
from ops.charm import CharmBase

Expand All @@ -36,6 +37,7 @@

logger = logging.getLogger(__name__)
PEERS_RELATION_ENDPOINT_NAME = "peers"
PROMETHEUS_DS_TYPE = "prometheus"


class TempoCoordinator(Coordinator):
Expand Down Expand Up @@ -132,20 +134,7 @@ def __init__(self, *args):
# or when ingress changes
self.ingress.on.ready,
],
extra_fields={
# https://grafana.com/docs/tempo/latest/metrics-generator/service_graphs/enable-service-graphs/
"httpMethod": "GET",
"serviceMap": {
"datasourceUid": "juju_svcgraph_61e32e2f-50ac-40e7-8ee8-1b7297a3e47f_prometheus_0"
},
# https://community.grafana.com/t/how-to-jump-from-traces-to-logs/72477/3
"tracesToLogs": {
"datasourceUid": "juju_svcgraph_61e32e2f-50ac-40e7-8ee8-1b7297a3e47f_loki_0"
},
"lokiSearch": {
"datasourceUid": "juju_svcgraph_61e32e2f-50ac-40e7-8ee8-1b7297a3e47f_loki_0"
},
},
extra_fields=self._grafana_source_extra_fields,
)

# peer
Expand All @@ -167,6 +156,25 @@ def __init__(self, *args):
######################
# UTILITY PROPERTIES #
######################
@property
def _grafana_source_extra_fields(self) -> Dict[str, Any]:
"""Extra fields needed for the grafana-source relation, like data correlation config."""
## https://grafana.com/docs/tempo/latest/metrics-generator/service_graphs/enable-service-graphs/
# "httpMethod": "GET",
# "serviceMap": {
# "datasourceUid": "juju_svcgraph_61e32e2f-50ac-40e7-8ee8-1b7297a3e47f_prometheus_0",
# },
# # https://community.grafana.com/t/how-to-jump-from-traces-to-logs/72477/3
# "tracesToLogs": {
# "datasourceUid": "juju_svcgraph_61e32e2f-50ac-40e7-8ee8-1b7297a3e47f_loki_0"
# },
# "lokiSearch": {
# "datasourceUid": "juju_svcgraph_61e32e2f-50ac-40e7-8ee8-1b7297a3e47f_loki_0"
# },

service_graph_config = self._build_service_graph_config()
return service_graph_config

@property
def peers(self):
"""Fetch the "peers" peer relation."""
Expand Down Expand Up @@ -512,6 +520,113 @@ def _reconcile(self):
self._update_ingress_relation()
self._update_tracing_relations()
self._update_source_exchange()
self._update_grafana_source()

def _get_grafana_source_uids(self) -> Dict[str, Dict[str, str]]:
"""Helper method to retrieve grafana source UIDs from remote databags using raw relations.
Duplicate implementation of GrafanaSourceProvider.get_source_uids() to use in the
situation where we want to access relation data when the GrafanaSourceProvider object
is not yet initialised.
"""
uids = {}
for rel in self.model.relations.get("grafana-source", []):
if not rel:
continue
app_databag = rel.data[rel.app]
grafana_uid = app_databag.get("grafana_uid")
if not grafana_uid:
logger.warning(
"remote end is using an old grafana_datasource interface: "
"`grafana_uid` field not found."
)
continue

uids[grafana_uid] = json.loads(app_databag.get("datasource_uids", "{}"))
return uids

def _build_service_graph_config(self) -> Dict[str, Any]:
"""Build the service graph config based on matching datasource UIDs.
To enable service graphs, we need the datasource UID of the prometheus/mimir instance where:
1- Tempo is connected to over "send-remote-write" relation.
2- It is also connected, as a datasource, to the same grafana instance(s) Tempo is connected to.
If there are multiple datasources that fit this description, we can assume that they are all
equivalent and we can use any of them.
"""

dsx_relations = {
relation.app.name: relation
for relation in self.coordinator.datasource_exchange._relations
}

remote_write_apps = {
relation.app.name
for relation in self.model.relations["send-remote-write"]
if relation.app and relation.data
}

# the list of datasource exchange relations whose remote we're also remote writing to.
remote_write_dsx_relations = [
dsx_relations[app_name]
for app_name in set(dsx_relations).intersection(remote_write_apps)
]

# grafana UIDs that are connected to this Tempo.
grafana_uids = set(self._get_grafana_source_uids())

remote_write_dsx_databags = []
for relation in remote_write_dsx_relations:
try:
datasource = DSExchangeAppData.load(relation.data[relation.app])
remote_write_dsx_databags.append(datasource)
except DataValidationError:
# load() already logs
continue

# filter the remote_write_dsx_databags with those that are connected to the same grafana instances Tempo is connected to.
matching_datasources = [
datasource
for databag in remote_write_dsx_databags
for datasource in databag.datasources
if datasource.grafana_uid in grafana_uids and datasource.type == PROMETHEUS_DS_TYPE
]

if not matching_datasources:
msg = "service graph disabled."
missing_rels = []
if not remote_write_apps:
missing_rels.append("send-remote-write")
if not grafana_uids:
missing_rels.append("grafana-source")
if not dsx_relations:
missing_rels.append("receive-datasource")

if missing_rels:
msg += f" Missing relations: {missing_rels}."

if not remote_write_dsx_relations:
msg += " There are no datasource_exchange relations with a Prometheus/Mimir that we're also remote writing to."
else:
msg += " There are no datasource_exchange relations to a Prometheus/Mimir that are datasources to the same grafana instances Tempo is connected to."

logger.info(msg)
return {}

if len(matching_datasources) > 1:
logger.info(
"there are multiple datasources that could be used to create the service graph. We assume that all are equivalent."
)

# At this point, we can assume any datasource is a valid datasource to use for service graphs.
matching_datasource = matching_datasources[0]
return {
"httpMethod": "GET",
"serviceMap": {
"datasourceUid": matching_datasource.uid,
},
}


if __name__ == "__main__": # pragma: nocover
Expand Down
Loading

0 comments on commit 30171ae

Please sign in to comment.