Skip to content

Commit

Permalink
Rework labeling so that everything is labeled with the charm it origi…
Browse files Browse the repository at this point in the history
…nated from
  • Loading branch information
dstathis committed Jan 17, 2024
1 parent 83fde6b commit f1f3e5b
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 284 deletions.
93 changes: 37 additions & 56 deletions lib/charms/grafana_agent/v0/cos_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,14 +211,14 @@ def __init__(self, *args):
from collections import namedtuple
from itertools import chain
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, List, Optional, Set, Union
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, List, Optional, Set, Tuple, Union

import pydantic
from cosl import GrafanaDashboard, JujuTopology
from cosl.rules import AlertRules
from ops.charm import RelationChangedEvent
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.model import Relation, Unit
from ops.model import Relation
from ops.testing import CharmType

if TYPE_CHECKING:
Expand Down Expand Up @@ -258,6 +258,8 @@ class CosAgentProviderUnitData(pydantic.BaseModel):
metrics_alert_rules: dict
log_alert_rules: dict
dashboards: List[GrafanaDashboard]
# subordinate is no longer used but we should keep it until we bump the library to ensure
# we don't break compatibility.
subordinate: Optional[bool]

# The following entries may vary across units of the same principal app.
Expand All @@ -277,9 +279,9 @@ class CosAgentPeersUnitData(pydantic.BaseModel):
# We need the principal unit name and relation metadata to be able to render identifiers
# (e.g. topology) on the leader side, after all the data moves into peer data (the grafana
# agent leader can only see its own principal, because it is a subordinate charm).
principal_unit_name: str
principal_relation_id: str
principal_relation_name: str
unit_name: str
relation_id: str
relation_name: str

# The only data that is forwarded to the leader is data that needs to go into the app databags
# of the outgoing o11y relations.
Expand All @@ -299,7 +301,7 @@ def app_name(self) -> str:
TODO: Switch to using `model_post_init` when pydantic v2 is released?
https://github.com/pydantic/pydantic/issues/1729#issuecomment-1300576214
"""
return self.principal_unit_name.split("/")[0]
return self.unit_name.split("/")[0]


class COSAgentProvider(Object):
Expand Down Expand Up @@ -375,7 +377,7 @@ def _on_refresh(self, event):
dashboards=self._dashboards,
metrics_scrape_jobs=self._scrape_jobs,
log_slots=self._log_slots,
subordinate=self._charm.meta.subordinate,
subordinate=None,
)
relation.data[self._charm.unit][data.KEY] = data.json()
except (
Expand Down Expand Up @@ -468,12 +470,6 @@ class COSAgentRequirerEvents(ObjectEvents):
validation_error = EventSource(COSAgentValidationError)


class MultiplePrincipalsError(Exception):
"""Custom exception for when there are multiple principal applications."""

pass


class COSAgentRequirer(Object):
"""Integration endpoint wrapper for the Requirer side of the cos_agent interface."""

Expand Down Expand Up @@ -559,13 +555,13 @@ def _on_relation_data_changed(self, event: RelationChangedEvent):
if not (provider_data := self._validated_provider_data(raw)):
return

# Copy data from the principal relation to the peer relation, so the leader could
# Copy data from the cos_agent relation to the peer relation, so the leader could
# follow up.
# Save the originating unit name, so it could be used for topology later on by the leader.
data = CosAgentPeersUnitData( # peer relation databag model
principal_unit_name=event.unit.name,
principal_relation_id=str(event.relation.id),
principal_relation_name=event.relation.name,
unit_name=event.unit.name,
relation_id=str(event.relation.id),
relation_name=event.relation.name,
metrics_alert_rules=provider_data.metrics_alert_rules,
log_alert_rules=provider_data.log_alert_rules,
dashboards=provider_data.dashboards,
Expand All @@ -592,39 +588,7 @@ def trigger_refresh(self, _):
self.on.data_changed.emit() # pyright: ignore

@property
def _principal_unit(self) -> Optional[Unit]:
"""Return the principal unit for a relation.
Assumes that the relation is of type subordinate.
Relies on the fact that, for subordinate relations, the only remote unit visible to
*this unit* is the principal unit that this unit is attached to.
"""
if relations := self._principal_relations:
# Technically it's a list, but for subordinates there can only be one relation
principal_relation = next(iter(relations))
if units := principal_relation.units:
# Technically it's a list, but for subordinates there can only be one
return next(iter(units))

return None

@property
def _principal_relations(self):
relations = []
for relation in self._charm.model.relations[self._relation_name]:
if not json.loads(relation.data[next(iter(relation.units))]["config"]).get(
["subordinate"], False
):
relations.append(relation)
if len(relations) > 1:
logger.error(
"Multiple applications claiming to be principal. Update the cos-agent library in the client application charms."
)
raise MultiplePrincipalsError("Multiple principal applications.")
return relations

@property
def _remote_data(self) -> List[CosAgentProviderUnitData]:
def _remote_data(self) -> List[Tuple[CosAgentProviderUnitData, JujuTopology]]:
"""Return a list of remote data from each of the related units.
Assumes that the relation is of type subordinate.
Expand All @@ -641,7 +605,16 @@ def _remote_data(self) -> List[CosAgentProviderUnitData]:
continue
if not (provider_data := self._validated_provider_data(raw)):
continue
all_data.append(provider_data)

# Apply topology
topology = JujuTopology(
model=self._charm.model.name,
model_uuid=self._charm.model.uuid,
application=unit.app.name,
unit=unit.name,
)

all_data.append((provider_data, topology))

return all_data

Expand Down Expand Up @@ -711,7 +684,7 @@ def metrics_alerts(self) -> Dict[str, Any]:
def metrics_jobs(self) -> List[Dict]:
"""Parse the relation data contents and extract the metrics jobs."""
scrape_jobs = []
for data in self._remote_data:
for data, topology in self._remote_data:
for job in data.metrics_scrape_jobs:
# In #220, relation schema changed from a simplified dict to the standard
# `scrape_configs`.
Expand All @@ -727,6 +700,14 @@ def metrics_jobs(self) -> List[Dict]:
"tls_config": {"insecure_skip_verify": True},
}

# Apply labels to the scrape jobs
for static_config in job.get("static_configs", []):
static_config["labels"] = {
# Be sure to keep labels from static_config
**static_config.get("labels", {}),
**topology.label_matcher_dict,
}

scrape_jobs.append(job)

return scrape_jobs
Expand All @@ -735,7 +716,7 @@ def metrics_jobs(self) -> List[Dict]:
def snap_log_endpoints(self) -> List[SnapEndpoint]:
"""Fetch logging endpoints exposed by related snaps."""
plugs = []
for data in self._remote_data:
for data, topology in self._remote_data:
targets = data.log_slots
if targets:
for target in targets:
Expand Down Expand Up @@ -775,7 +756,7 @@ def logs_alerts(self) -> Dict[str, Any]:
model=self._charm.model.name,
model_uuid=self._charm.model.uuid,
application=app_name,
# For the topology unit, we could use `data.principal_unit_name`, but that unit
# For the topology unit, we could use `data.unit_name`, but that unit
# name may not be very stable: `_gather_peer_data` de-duplicates by app name so
# the exact unit name that turns up first in the iterator may vary from time to
# time. So using the grafana-agent unit name instead.
Expand Down Expand Up @@ -808,9 +789,9 @@ def dashboards(self) -> List[Dict[str, str]]:

dashboards.append(
{
"relation_id": data.principal_relation_id,
"relation_id": data.relation_id,
# We have the remote charm name - use it for the identifier
"charm": f"{data.principal_relation_name}-{app_name}",
"charm": f"{data.relation_name}-{app_name}",
"content": content,
"title": title,
}
Expand Down
Loading

0 comments on commit f1f3e5b

Please sign in to comment.