From 8ab90801c508a5f5e9df7ad85edfa08f27729996 Mon Sep 17 00:00:00 2001 From: Ryan Barry Date: Tue, 14 Jun 2022 13:10:10 -0400 Subject: [PATCH] Use `cos-tool` for transformation and validation, validate alert rules (#297) * Migrate to cos-tool for transformations * Add an event, add tests for validation, update docs and comments * Add juju_topology lib to the tester * Fix a silly typo from the merge * Review comments * Emi invalid alert events from remote_write, too * Tweak wording in comments * Use the right name in charmcraft.yaml * Add a comma back if it was present --- .gitignore | 3 +- .jujuignore | 2 +- charmcraft.yaml | 6 +- .../observability_libs/v0/juju_topology.py | 288 +++++++++++++ .../v0/prometheus_remote_write.py | 323 +++++---------- .../prometheus_k8s/v0/prometheus_scrape.py | 384 +++++++----------- .../observability_libs/v0/juju_topology.py | 288 +++++++++++++ tests/unit/test_charm.py | 1 + tests/unit/test_endpoint_aggregator.py | 17 +- tests/unit/test_endpoint_consumer.py | 5 +- tests/unit/test_endpoint_provider.py | 92 +++-- tests/unit/test_prometheus_rules_provider.py | 5 + tests/unit/test_remote_write.py | 9 + tests/unit/test_transform.py | 105 +++-- tox.ini | 2 +- 15 files changed, 1010 insertions(+), 520 deletions(-) create mode 100644 lib/charms/observability_libs/v0/juju_topology.py create mode 100644 tests/integration/prometheus-tester/lib/charms/observability_libs/v0/juju_topology.py diff --git a/.gitignore b/.gitignore index cc68e0a3..bec476d7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,5 +9,4 @@ venv .idea .vscode tests/integration/prometheus-tester/lib/charms/prometheus_k8s/v0/prometheus_scrape.py -promql-transform -promql-transform* +cos-tool* diff --git a/.jujuignore b/.jujuignore index f6ecb4f6..b31bd67a 100644 --- a/.jujuignore +++ b/.jujuignore @@ -7,4 +7,4 @@ tox.ini CONTRIBUTING.md INTEGRATING.md RELEASE.md -promql-transform-* +cos-tool* diff --git a/charmcraft.yaml b/charmcraft.yaml index 912cadd1..b556e8ca 100644 --- a/charmcraft.yaml +++ b/charmcraft.yaml @@ -12,11 +12,11 @@ parts: charm: build-packages: - git - promql-transform: + cos-tool: plugin: dump source: . build-packages: - curl override-pull: | - curl -L -O https://github.com/canonical/promql-transform/releases/latest/download/promql-transform-${CRAFT_TARGET_ARCH} - chmod +x promql-transform-* + curl -L -O https://github.com/canonical/cos-tool/releases/latest/download/cos-tool-${CRAFT_TARGET_ARCH} + chmod +x cos-tool-* diff --git a/lib/charms/observability_libs/v0/juju_topology.py b/lib/charms/observability_libs/v0/juju_topology.py new file mode 100644 index 00000000..a065dd53 --- /dev/null +++ b/lib/charms/observability_libs/v0/juju_topology.py @@ -0,0 +1,288 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to use the `JujuTopology` class to +create and consume topology information from Juju in a consistent manner. + +The goal of the Juju topology is to uniquely identify a piece +of software running across any of your Juju-managed deployments. +This is achieved by combining the following four elements: + +- Model name +- Model UUID +- Application name +- Unit identifier + + +For a more in-depth description of the concept, as well as a +walk-through of it's use-case in observability, see +[this blog post](https://juju.is/blog/model-driven-observability-part-2-juju-topology-metrics) +on the Juju blog. + +## Library Usage + +This library may be used to create and consume `JujuTopology` objects. +The `JujuTopology` class provides three ways to create instances: + +### Using the `from_charm` method + +Enables instantiation by supplying the charm as an argument. When +creating topology objects for the current charm, this is the recommended +approach. + +```python +topology = JujuTopology.from_charm(self) +``` + +### Using the `from_dict` method + +Allows for instantion using a dictionary of relation data, like the +`scrape_metadata` from Prometheus or the labels of an alert rule. When +creating topology objects for remote charms, this is the recommended +approach. + +```python +scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) +topology = JujuTopology.from_dict(scrape_metadata) +``` + +### Using the class constructor + +Enables instantiation using whatever values you want. While this +is useful in some very specific cases, this is almost certainly not +what you are looking for as setting these values manually may +result in observability metrics which do not uniquely identify a +charm in order to provide accurate usage reporting, alerting, +horizontal scaling, or other use cases. + +```python +topology = JujuTopology( + model="some-juju-model", + model_uuid="00000000-0000-0000-0000-000000000001", + application="fancy-juju-application", + unit="fancy-juju-application/0", + charm_name="fancy-juju-application-k8s", +) +``` + +""" + +import re +from collections import OrderedDict +from typing import Dict, List, Optional + +# The unique Charmhub library identifier, never change it +LIBID = "bced1658f20f49d28b88f61f83c2d232" + +LIBAPI = 0 +LIBPATCH = 1 + + +class InvalidUUIDError(Exception): + """Invalid UUID was provided.""" + + def __init__(self, uuid: str): + self.message = "'{}' is not a valid UUID.".format(uuid) + super().__init__(self.message) + + +class JujuTopology: + """JujuTopology is used for storing, generating and formatting juju topology information.""" + + def __init__( + self, + model: str, + model_uuid: str, + application: str, + unit: str = None, + charm_name: str = None, + ): + """Build a JujuTopology object. + + A `JujuTopology` object is used for storing and transforming + Juju topology information. This information is used to + annotate Prometheus scrape jobs and alert rules. Such + annotation when applied to scrape jobs helps in identifying + the source of the scrapped metrics. On the other hand when + applied to alert rules topology information ensures that + evaluation of alert expressions is restricted to the source + (charm) from which the alert rules were obtained. + + Args: + model: a string name of the Juju model + model_uuid: a globally unique string identifier for the Juju model + application: an application name as a string + unit: a unit name as a string + charm_name: name of charm as a string + """ + if not self.is_valid_uuid(model_uuid): + raise InvalidUUIDError(model_uuid) + + self._model = model + self._model_uuid = model_uuid + self._application = application + self._charm_name = charm_name + self._unit = unit + + def is_valid_uuid(self, uuid): + """Validates the supplied UUID against the Juju Model UUID pattern.""" + regex = re.compile( + "^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}$" + ) + return bool(regex.match(uuid)) + + @classmethod + def from_charm(cls, charm): + """Creates a JujuTopology instance by using the model data available on a charm object. + + Args: + charm: a `CharmBase` object for which the `JujuTopology` will be constructed + Returns: + a `JujuTopology` object. + """ + return cls( + model=charm.model.name, + model_uuid=charm.model.uuid, + application=charm.model.app.name, + unit=charm.model.unit.name, + charm_name=charm.meta.name, + ) + + @classmethod + def from_dict(cls, data: dict): + """Factory method for creating `JujuTopology` children from a dictionary. + + Args: + data: a dictionary with five keys providing topology information. The keys are + - "model" + - "model_uuid" + - "application" + - "unit" + - "charm_name" + `unit` and `charm_name` may be empty, but will result in more limited + labels. However, this allows us to support charms without workloads. + + Returns: + a `JujuTopology` object. + """ + return cls( + model=data["model"], + model_uuid=data["model_uuid"], + application=data["application"], + unit=data.get("unit", ""), + charm_name=data.get("charm_name", ""), + ) + + def as_dict( + self, *, remapped_keys: Dict[str, str] = None, excluded_keys: List[str] = None + ) -> OrderedDict: + """Format the topology information into an ordered dict. + + Keeping the dictionary ordered is important to be able to + compare dicts without having to resort to deep comparisons. + + Args: + remapped_keys: A dictionary mapping old key names to new key names, + which will be substituted when invoked. + excluded_keys: A list of key names to exclude from the returned dict. + uuid_length: The length to crop the UUID to. + """ + ret = OrderedDict( + [ + ("model", self.model), + ("model_uuid", self.model_uuid), + ("application", self.application), + ("unit", self.unit), + ("charm_name", self.charm_name), + ] + ) + if excluded_keys: + ret = OrderedDict({k: v for k, v in ret.items() if k not in excluded_keys}) + + if remapped_keys: + ret = OrderedDict( + (remapped_keys.get(k), v) if remapped_keys.get(k) else (k, v) for k, v in ret.items() # type: ignore + ) + + return ret + + @property + def identifier(self) -> str: + """Format the topology information into a terse string. + + This crops the model UUID, making it unsuitable for comparisons against + anything but other identifiers. Mainly to be used as a display name or file + name where long strings might become an issue. + + >>> JujuTopology( \ + model = "a-model", \ + model_uuid = "00000000-0000-4000-8000-000000000000", \ + application = "some-app", \ + unit = "some-app/1" \ + ).identifier + 'a-model_00000000_some-app' + """ + parts = self.as_dict( + excluded_keys=["unit", "charm_name"], + ) + + parts["model_uuid"] = self.model_uuid_short + values = parts.values() + + return "_".join([str(val) for val in values]).replace("/", "_") + + @property + def label_matcher_dict(self) -> Dict[str, str]: + """Format the topology information into a dict with keys having 'juju_' as prefix. + + Relabelled topology never includes the unit as it would then only match + the leader unit (ie. the unit that produced the dict). + """ + items = self.as_dict( + remapped_keys={"charm_name": "charm"}, + excluded_keys=["unit"], + ).items() + + return {"juju_{}".format(key): value for key, value in items if value} + + @property + def label_matchers(self) -> str: + """Format the topology information into a promql/logql label matcher string. + + Topology label matchers should never include the unit as it + would then only match the leader unit (ie. the unit that + produced the matchers). + """ + items = self.label_matcher_dict.items() + return ", ".join(['{}="{}"'.format(key, value) for key, value in items if value]) + + @property + def model(self) -> str: + """Getter for the juju model value.""" + return self._model + + @property + def model_uuid(self) -> str: + """Getter for the juju model uuid value.""" + return self._model_uuid + + @property + def model_uuid_short(self) -> str: + """Getter for the juju model value, truncated to the first eight letters.""" + return self._model_uuid[:8] + + @property + def application(self) -> str: + """Getter for the juju application value.""" + return self._application + + @property + def charm_name(self) -> Optional[str]: + """Getter for the juju charm name value.""" + return self._charm_name + + @property + def unit(self) -> Optional[str]: + """Getter for the juju unit value.""" + return self._unit diff --git a/lib/charms/prometheus_k8s/v0/prometheus_remote_write.py b/lib/charms/prometheus_k8s/v0/prometheus_remote_write.py index a44dc792..0cbfef36 100644 --- a/lib/charms/prometheus_k8s/v0/prometheus_remote_write.py +++ b/lib/charms/prometheus_k8s/v0/prometheus_remote_write.py @@ -17,11 +17,13 @@ import re import socket import subprocess -from collections import OrderedDict +import tempfile +import uuid from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Tuple, Union import yaml +from charms.observability_libs.v0.juju_topology import JujuTopology from ops.charm import CharmBase, HookEvent, RelationEvent, RelationMeta, RelationRole from ops.framework import EventBase, EventSource, Object, ObjectEvents from ops.model import Relation @@ -97,204 +99,28 @@ def __init__( super().__init__(self.message) -class JujuTopology: - """Class for storing and formatting juju topology information.""" +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. - STUB = "%%juju_topology%%" - - def __new__(cls, *args, **kwargs): - """Reject instantiation of a base JujuTopology class. Children only.""" - if cls is JujuTopology: - raise TypeError("only children of '{}' may be instantiated".format(cls.__name__)) - return object.__new__(cls) - - def __init__( - self, - model: str, - model_uuid: str, - application: str, - unit: Optional[str] = "", - charm_name: Optional[str] = "", - ): - """Build a JujuTopology object. - - A `JujuTopology` object is used for storing and transforming - Juju Topology information. This information is used to - annotate Prometheus scrape jobs and alert rules. Such - annotation when applied to scrape jobs helps in identifying - the source of the scrapped metrics. On the other hand when - applied to alert rules topology information ensures that - evaluation of alert expressions is restricted to the source - (charm) from which the alert rules were obtained. - - Args: - model: a string name of the Juju model - model_uuid: a globally unique string identifier for the Juju model - application: an application name as a string - unit: a unit name as a string - charm_name: name of charm as a string - - Note: - `JujuTopology` should not be constructed directly by charm code. Please - use `ProviderTopology` or `AggregatorTopology`. - """ - self.model = model - self.model_uuid = model_uuid - self.application = application - self.charm_name = charm_name - self.unit = unit - - @classmethod - def from_charm(cls, charm): - """Factory method for creating `JujuTopology` children from a given charm. - - Args: - charm: a `CharmBase` object for which the `JujuTopology` has to be constructed - - Returns: - a `JujuTopology` object. - """ - return cls( - model=charm.model.name, - model_uuid=charm.model.uuid, - application=charm.model.app.name, - unit=charm.model.unit.name, - charm_name=charm.meta.name, - ) - - @classmethod - def from_relation_data(cls, data: dict): - """Factory method for creating `JujuTopology` children from a dictionary. - - Args: - data: a dictionary with four keys providing topology information. The keys are - - "model" - - "model_uuid" - - "application" - - "unit" - - "charm_name" - - `unit` and `charm_name` may be empty, but will result in more limited - labels. However, this allows us to support payload-only charms. - - Returns: - a `JujuTopology` object. - """ - return cls( - model=data["model"], - model_uuid=data["model_uuid"], - application=data["application"], - unit=data.get("unit", ""), - charm_name=data.get("charm_name", ""), - ) - - @property - def identifier(self) -> str: - """Format the topology information into a terse string.""" - # This is odd, but may have `None` as a model key - return "_".join([str(val) for val in self.as_promql_label_dict().values()]).replace( - "/", "_" - ) - - @property - def promql_labels(self) -> str: - """Format the topology information into a verbose string.""" - return ", ".join( - ['{}="{}"'.format(key, value) for key, value in self.as_promql_label_dict().items()] - ) - - def as_dict(self, rename_keys: Optional[Dict[str, str]] = None) -> OrderedDict: - """Format the topology information into a dict. - - Use an OrderedDict so we can rely on the insertion order on Python 3.5 (and 3.6, - which still does not guarantee it). - - Args: - rename_keys: A dictionary mapping old key names to new key names, which will - be substituted when invoked. - """ - ret = OrderedDict( - [ - ("model", self.model), - ("model_uuid", self.model_uuid), - ("application", self.application), - ("unit", self.unit), - ("charm_name", self.charm_name), - ] - ) - - ret["unit"] or ret.pop("unit") - ret["charm_name"] or ret.pop("charm_name") - - # If a key exists in `rename_keys`, replace the value - if rename_keys: - ret = OrderedDict( - (rename_keys.get(k), v) if rename_keys.get(k) else (k, v) for k, v in ret.items() # type: ignore - ) - - return ret + Enables us to set a clear status on the provider. + """ - def as_promql_label_dict(self): - """Format the topology information into a dict with keys having 'juju_' as prefix.""" - vals = { - "juju_{}".format(key): val - for key, val in self.as_dict(rename_keys={"charm_name": "charm"}).items() + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, } - # The leader is the only unit that sets alert rules, if "juju_unit" is present, - # then the rules will only be evaluated for that unit - if "juju_unit" in vals: - vals.pop("juju_unit") - - return vals - - def render(self, template: str): - """Render a juju-topology template string with topology info.""" - return template.replace(JujuTopology.STUB, self.promql_labels) - - -class AggregatorTopology(JujuTopology): - """Class for initializing topology information for MetricsEndpointAggregator.""" - - @classmethod - def create(cls, model: str, model_uuid: str, application: str, unit: str): - """Factory method for creating the `AggregatorTopology` dataclass from a given charm. - - Args: - model: a string representing the model - model_uuid: the model UUID as a string - application: the application name - unit: the unit name - Returns: - a `AggregatorTopology` object. - """ - return cls( - model=model, - model_uuid=model_uuid, - application=application, - unit=unit, - ) - - def as_promql_label_dict(self): - """Format the topology information into a dict with keys having 'juju_' as prefix.""" - vals = {"juju_{}".format(key): val for key, val in self.as_dict().items()} - - # FIXME: Why is this different? I have no idea. The uuid length should be the same - vals["juju_model_uuid"] = vals["juju_model_uuid"][:7] - - return vals - - -class ProviderTopology(JujuTopology): - """Class for initializing topology information for MetricsEndpointProvider.""" - @property - def scrape_identifier(self): - """Format the topology information into a scrape identifier.""" - # This is used only by Metrics[Consumer|Provider] and does not need a - # unit name, so only check for the charm name - return "juju_{}_prometheus_scrape".format( - "_".join([self.model, self.model_uuid[:7], self.application, self.charm_name]) # type: ignore - ) + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] def _is_official_alert_rule_format(rules_dict: dict) -> bool: @@ -367,6 +193,7 @@ def __init__(self, topology: Optional[JujuTopology] = None): topology: an optional `JujuTopology` instance that is used to annotate all alert rules. """ self.topology = topology + self.tool = CosTool(None) self.alert_groups = [] # type: List[dict] def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: @@ -417,11 +244,14 @@ def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: if self.topology: # only insert labels that do not already exist - for label, val in self.topology.as_promql_label_dict().items(): + for label, val in self.topology.label_matcher_dict.items(): if label not in alert_rule["labels"]: alert_rule["labels"][label] = val # insert juju topology filters into a prometheus alert rule - alert_rule["expr"] = self.topology.render(alert_rule["expr"]) + alert_rule["expr"] = self.tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", alert_rule["expr"]), + self.topology.label_matcher_dict, + ) return alert_groups @@ -650,6 +480,7 @@ class PrometheusRemoteWriteConsumerEvents(ObjectEvents): """Event descriptor for events raised by `PrometheusRemoteWriteConsumer`.""" endpoints_changed = EventSource(PrometheusRemoteWriteEndpointsChangedEvent) + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) class PrometheusRemoteWriteConsumer(Object): @@ -792,7 +623,7 @@ def __init__( self._relation_name = relation_name self._alert_rules_path = alert_rules_path - self.topology = ProviderTopology.from_charm(charm) + self.topology = JujuTopology.from_charm(charm) on_relation = self._charm.on[self._relation_name] @@ -809,6 +640,18 @@ def __init__( ) def _handle_endpoints_changed(self, event: RelationEvent) -> None: + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + self.on.endpoints_changed.emit(relation_id=event.relation.id) def _push_alerts_on_relation_joined(self, event: RelationEvent) -> None: @@ -943,7 +786,7 @@ def __init__( super().__init__(charm, relation_name) self._charm = charm - self._transformer = PromqlTransformer(self._charm) + self.tool = CosTool(self._charm) self._relation_name = relation_name self._endpoint_schema = endpoint_schema self._endpoint_address = endpoint_address @@ -1048,6 +891,7 @@ def alerts(self) -> dict: logger.debug("No alert groups were found in relation data") continue # Construct an ID based on what's in the alert rules + error_messages = [] for group in alert_rules["groups"]: try: labels = group["rules"][0]["labels"] @@ -1056,6 +900,12 @@ def alerts(self) -> dict: labels["juju_model_uuid"], labels["juju_application"], ) + + _, errmsg = self.tool.validate_alert_rules({"groups": [group]}) + + if errmsg: + error_messages.append(errmsg) + continue if identifier not in alerts: alerts[identifier] = {"groups": [group]} else: @@ -1063,32 +913,38 @@ def alerts(self) -> dict: except KeyError: logger.error("Alert rules were found but no usable labels were present") + if error_messages: + relation.data[self._charm.app]["event"] = json.dumps( + {"errors": "; ".join(error_messages)} + ) + continue + return alerts # Copy/pasted from prometheus_scrape.py -class PromqlTransformer: - """Uses promql-transform to inject label matchers into alert rule expressions.""" +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" _path = None _disabled = False + def __init__(self, charm): + self._charm = charm + @property def path(self): - """Lazy lookup of the path of promql-transform.""" + """Lazy lookup of the path of cos-tool.""" if self._disabled: return None if not self._path: - self._path = self._get_transformer_path() + self._path = self._get_tool_path() if not self._path: logger.debug("Skipping injection of juju topology as label matchers") self._disabled = True return self._path - def __init__(self, charm): - self._charm = charm - - def apply_label_matchers(self, rules): + def apply_label_matchers(self, rules) -> dict: """Will apply label matchers to the expression of all alerts in all supplied groups.""" if not self.path: return rules @@ -1108,18 +964,51 @@ def apply_label_matchers(self, rules): if label in rule["labels"]: topology[label] = rule["labels"][label] - rule["expr"] = self._apply_label_matcher(rule["expr"], topology) + rule["expr"] = self.inject_label_matchers(rule["expr"], topology) return rules - def _apply_label_matcher(self, expression, topology): + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + + # Smash "our" rules format into what upstream actually uses, which is more like: + # + # groups: + # - name: foo + # rules: + # - alert: SomeAlert + # expr: up + # - alert: OtherAlert + # expr: up + transformed_rules = {"groups": []} # type: ignore + for rule in rules["groups"]: + transformed = {"name": str(uuid.uuid4()), "rules": [rule]} + transformed_rules["groups"].append(transformed) + + rule_path.write_text(yaml.dump(rules)) + + args = [str(self.path), "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output) + return False, ", ".join([line for line in e.output if "error validating" in line]) + + def inject_label_matchers(self, expression, topology): + """Add label matchers to an expression.""" if not topology: return expression if not self.path: - logger.debug( - "`promql-transform` unavailable. leaving expression unchanged: %s", expression - ) + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) return expression - args = [str(self.path)] + args = [str(self.path), "transform"] args.extend( ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] ) @@ -1132,10 +1021,10 @@ def _apply_label_matcher(self, expression, topology): logger.debug('Applying the expression failed: "%s", falling back to the original', e) return expression - def _get_transformer_path(self) -> Optional[Path]: + def _get_tool_path(self) -> Optional[Path]: arch = platform.processor() arch = "amd64" if arch == "x86_64" else arch - res = "promql-transform-{}".format(arch) + res = "cos-tool-{}".format(arch) try: path = Path(res).resolve() path.chmod(0o777) @@ -1143,7 +1032,7 @@ def _get_transformer_path(self) -> Optional[Path]: except NotImplementedError: logger.debug("System lacks support for chmod") except FileNotFoundError: - logger.debug('Could not locate promql transform at: "{}"'.format(res)) + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) return None def _exec(self, cmd): diff --git a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index 9ac4d21a..0f476ab4 100644 --- a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -316,13 +316,16 @@ def _on_scrape_targets_changed(self, event): import logging import os import platform +import re import socket import subprocess -from collections import OrderedDict +import tempfile +import uuid from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Tuple, Union import yaml +from charms.observability_libs.v0.juju_topology import JujuTopology from ops.charm import CharmBase, RelationRole from ops.framework import BoundEvent, EventBase, EventSource, Object, ObjectEvents @@ -415,6 +418,36 @@ def __init__( super().__init__(self.message) +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, + } + + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] + + +class MetricsEndpointProviderEvents(ObjectEvents): + """Events raised by :class:`InvalidAlertRuleEvent`s.""" + + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) + + def _validate_relation_by_interface_and_direction( charm: CharmBase, relation_name: str, @@ -496,205 +529,6 @@ def _sanitize_scrape_configuration(job) -> dict: return sanitized_job -class JujuTopology: - """Class for storing and formatting juju topology information.""" - - STUB = "%%juju_topology%%" - - def __new__(cls, *args, **kwargs): - """Reject instantiation of a base JujuTopology class. Children only.""" - if cls is JujuTopology: - raise TypeError("only children of '{}' may be instantiated".format(cls.__name__)) - return object.__new__(cls) - - def __init__( - self, - model: str, - model_uuid: str, - application: str, - unit: Optional[str] = "", - charm_name: Optional[str] = "", - ): - """Build a JujuTopology object. - - A `JujuTopology` object is used for storing and transforming - Juju Topology information. This information is used to - annotate Prometheus scrape jobs and alert rules. Such - annotation when applied to scrape jobs helps in identifying - the source of the scrapped metrics. On the other hand when - applied to alert rules topology information ensures that - evaluation of alert expressions is restricted to the source - (charm) from which the alert rules were obtained. - - Args: - model: a string name of the Juju model - model_uuid: a globally unique string identifier for the Juju model - application: an application name as a string - unit: a unit name as a string - charm_name: name of charm as a string - - Note: - `JujuTopology` should not be constructed directly by charm code. Please - use `ProviderTopology` or `AggregatorTopology`. - """ - self.model = model - self.model_uuid = model_uuid - self.application = application - self.charm_name = charm_name - self.unit = unit - - @classmethod - def from_charm(cls, charm): - """Factory method for creating `JujuTopology` children from a given charm. - - Args: - charm: a `CharmBase` object for which the `JujuTopology` has to be constructed - - Returns: - a `JujuTopology` object. - """ - return cls( - model=charm.model.name, - model_uuid=charm.model.uuid, - application=charm.model.app.name, - unit=charm.model.unit.name, - charm_name=charm.meta.name, - ) - - @classmethod - def from_relation_data(cls, data: dict): - """Factory method for creating `JujuTopology` children from a dictionary. - - Args: - data: a dictionary with four keys providing topology information. The keys are - - "model" - - "model_uuid" - - "application" - - "unit" - - "charm_name" - - `unit` and `charm_name` may be empty, but will result in more limited - labels. However, this allows us to support payload-only charms. - - Returns: - a `JujuTopology` object. - """ - return cls( - model=data["model"], - model_uuid=data["model_uuid"], - application=data["application"], - unit=data.get("unit", ""), - charm_name=data.get("charm_name", ""), - ) - - @property - def identifier(self) -> str: - """Format the topology information into a terse string.""" - # This is odd, but may have `None` as a model key - return "_".join([str(val) for val in self.as_promql_label_dict().values()]).replace( - "/", "_" - ) - - @property - def promql_labels(self) -> str: - """Format the topology information into a verbose string.""" - return ", ".join( - ['{}="{}"'.format(key, value) for key, value in self.as_promql_label_dict().items()] - ) - - def as_dict(self, rename_keys: Optional[Dict[str, str]] = None) -> OrderedDict: - """Format the topology information into a dict. - - Use an OrderedDict so we can rely on the insertion order on Python 3.5 (and 3.6, - which still does not guarantee it). - - Args: - rename_keys: A dictionary mapping old key names to new key names, which will - be substituted when invoked. - """ - ret = OrderedDict( - [ - ("model", self.model), - ("model_uuid", self.model_uuid), - ("application", self.application), - ("unit", self.unit), - ("charm_name", self.charm_name), - ] - ) - - ret["unit"] or ret.pop("unit") - ret["charm_name"] or ret.pop("charm_name") - - # If a key exists in `rename_keys`, replace the value - if rename_keys: - ret = OrderedDict( - (rename_keys.get(k), v) if rename_keys.get(k) else (k, v) for k, v in ret.items() # type: ignore - ) - - return ret - - def as_promql_label_dict(self): - """Format the topology information into a dict with keys having 'juju_' as prefix.""" - vals = { - "juju_{}".format(key): val - for key, val in self.as_dict(rename_keys={"charm_name": "charm"}).items() - } - # The leader is the only unit that sets alert rules, if "juju_unit" is present, - # then the rules will only be evaluated for that unit - if "juju_unit" in vals: - vals.pop("juju_unit") - - return vals - - def render(self, template: str): - """Render a juju-topology template string with topology info.""" - return template.replace(JujuTopology.STUB, self.promql_labels) - - -class AggregatorTopology(JujuTopology): - """Class for initializing topology information for MetricsEndpointAggregator.""" - - @classmethod - def create(cls, model: str, model_uuid: str, application: str, unit: str): - """Factory method for creating the `AggregatorTopology` dataclass from a given charm. - - Args: - model: a string representing the model - model_uuid: the model UUID as a string - application: the application name - unit: the unit name - - Returns: - a `AggregatorTopology` object. - """ - return cls( - model=model, - model_uuid=model_uuid, - application=application, - unit=unit, - ) - - def as_promql_label_dict(self): - """Format the topology information into a dict with keys having 'juju_' as prefix.""" - vals = {"juju_{}".format(key): val for key, val in self.as_dict().items()} - - # FIXME: Why is this different? I have no idea. The uuid length should be the same - vals["juju_model_uuid"] = vals["juju_model_uuid"][:7] - - return vals - - -class ProviderTopology(JujuTopology): - """Class for initializing topology information for MetricsEndpointProvider.""" - - @property - def scrape_identifier(self): - """Format the topology information into a scrape identifier.""" - # This is used only by Metrics[Consumer|Provider] and does not need a - # unit name, so only check for the charm name - return "juju_{}_prometheus_scrape".format(self.identifier) - - class InvalidAlertRulePathError(Exception): """Raised if the alert rules folder cannot be found or is otherwise invalid.""" @@ -780,6 +614,7 @@ def __init__(self, topology: Optional[JujuTopology] = None): topology: an optional `JujuTopology` instance that is used to annotate all alert rules. """ self.topology = topology + self.tool = CosTool(None) self.alert_groups = [] # type: List[dict] def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: @@ -828,9 +663,12 @@ def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: alert_rule["labels"] = {} if self.topology: - alert_rule["labels"].update(self.topology.as_promql_label_dict()) + alert_rule["labels"].update(self.topology.label_matcher_dict) # insert juju topology filters into a prometheus alert rule - alert_rule["expr"] = self.topology.render(alert_rule["expr"]) + alert_rule["expr"] = self.tool.inject_label_matchers( + re.sub(r"%%juju_topology%%(,?)", r"\1", alert_rule["expr"]), + self.topology.label_matcher_dict, + ) return alert_groups @@ -990,7 +828,7 @@ def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME) super().__init__(charm, relation_name) self._charm = charm self._relation_name = relation_name - self._transformer = PromqlTransformer(self._charm) + self._tool = CosTool(self._charm) events = self._charm.on[relation_name] self.framework.observe(events.relation_changed, self._on_metrics_provider_relation_changed) self.framework.observe( @@ -1098,8 +936,8 @@ def alerts(self) -> dict: identifier = None try: scrape_metadata = json.loads(relation.data[relation.app]["scrape_metadata"]) - identifier = ProviderTopology.from_relation_data(scrape_metadata).identifier - alerts[identifier] = self._transformer.apply_label_matchers(alert_rules) + identifier = JujuTopology.from_dict(scrape_metadata).identifier + alerts[identifier] = self._tool.apply_label_matchers(alert_rules) except KeyError as e: logger.debug( @@ -1114,8 +952,16 @@ def alerts(self) -> dict: "Alert rules were found but no usable group or identifier was present" ) continue + alerts[identifier] = alert_rules + _, errmsg = self._tool.validate_alert_rules(alert_rules) + if errmsg: + if alerts[identifier]: + del alerts[identifier] + relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) + continue + return alerts def _get_identifier_by_alert_rules(self, rules: dict) -> Union[str, None]: @@ -1186,8 +1032,9 @@ def _static_scrape_config(self, relation) -> list: if not scrape_metadata: return scrape_jobs - job_name_prefix = ProviderTopology.from_relation_data(scrape_metadata).scrape_identifier - + job_name_prefix = "juju_{}_prometheus_scrape".format( + JujuTopology.from_dict(scrape_metadata).identifier + ) hosts = self._relation_hosts(relation) labeled_job_configs = [] @@ -1319,9 +1166,7 @@ def _set_juju_labels(self, labels, scrape_metadata) -> dict: topology information with the exception of unit name. """ juju_labels = labels.copy() # deep copy not needed - juju_labels.update( - ProviderTopology.from_relation_data(scrape_metadata).as_promql_label_dict() - ) + juju_labels.update(JujuTopology.from_dict(scrape_metadata).label_matcher_dict) return juju_labels @@ -1424,6 +1269,8 @@ def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> st class MetricsEndpointProvider(Object): """A metrics endpoint for Prometheus.""" + on = MetricsEndpointProviderEvents() + def __init__( self, charm, @@ -1505,6 +1352,15 @@ def __init__( expr: up{juju_model=, juju_model_uuid=, juju_application=} < 1 for: 0m + An attempt will be made to validate alert rules prior to loading them into Prometheus. + If they are invalid, an event will be emitted from this object which charms can respond + to in order to set a meaningful status for administrators. + + This can be observed via `consumer.on.alert_rule_status_changed` which contains: + - The error(s) encountered when validating as `errors` + - A `valid` attribute, which can be used to reset the state of charms if alert rules + are updated via another mechanism (e.g. `cos-config`) and refreshed. + Args: charm: a `CharmBase` object that manages this `MetricsEndpointProvider` object. Typically this is @@ -1551,7 +1407,7 @@ def __init__( ) super().__init__(charm, relation_name) - self.topology = ProviderTopology.from_charm(charm) + self.topology = JujuTopology.from_charm(charm) self._charm = charm self._alert_rules_path = alert_rules_path @@ -1562,7 +1418,7 @@ def __init__( events = self._charm.on[self._relation_name] self.framework.observe(events.relation_joined, self._set_scrape_job_spec) - self.framework.observe(events.relation_changed, self._set_scrape_job_spec) + self.framework.observe(events.relation_changed, self._on_relation_changed) if not refresh_event: if len(self._charm.meta.containers) == 1: @@ -1594,6 +1450,22 @@ def __init__( # If there is no leader during relation_joined we will still need to set alert rules. self.framework.observe(self._charm.on.leader_elected, self._set_scrape_job_spec) + def _on_relation_changed(self, event): + """Check for alert rule messages in the relation data before moving on.""" + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + self._set_scrape_job_spec(event) + def _set_scrape_job_spec(self, event): """Ensure scrape target information is made available to prometheus. @@ -2138,11 +2010,14 @@ def _label_alert_rules(self, unit_rules, appname) -> list: labeled_rules = [] for unit_name, rules in unit_rules.items(): for rule in rules: - rule["labels"].update( - AggregatorTopology.create( - self.model.name, self.model.uuid, appname, unit_name - ).as_promql_label_dict() - ) + # the new JujuTopology removed this, so build it up by hand + matchers = { + "juju_{}".format(k): v + for k, v in JujuTopology(self.model.name, self.model.uuid, appname, unit_name) + .as_dict(excluded_keys=["charm_name"]) + .items() + } + rule["labels"].update(matchers.items()) labeled_rules.append(rule) return labeled_rules @@ -2218,28 +2093,28 @@ def _relabel_configs(self) -> list: ) -class PromqlTransformer: - """Uses promql-transform to inject label matchers into alert rule expressions.""" +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" _path = None _disabled = False + def __init__(self, charm): + self._charm = charm + @property def path(self): - """Lazy lookup of the path of promql-transform.""" + """Lazy lookup of the path of cos-tool.""" if self._disabled: return None if not self._path: - self._path = self._get_transformer_path() + self._path = self._get_tool_path() if not self._path: logger.debug("Skipping injection of juju topology as label matchers") self._disabled = True return self._path - def __init__(self, charm): - self._charm = charm - - def apply_label_matchers(self, rules): + def apply_label_matchers(self, rules) -> dict: """Will apply label matchers to the expression of all alerts in all supplied groups.""" if not self.path: return rules @@ -2259,18 +2134,51 @@ def apply_label_matchers(self, rules): if label in rule["labels"]: topology[label] = rule["labels"][label] - rule["expr"] = self._apply_label_matcher(rule["expr"], topology) + rule["expr"] = self.inject_label_matchers(rule["expr"], topology) return rules - def _apply_label_matcher(self, expression, topology): + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness of alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + + # Smash "our" rules format into what upstream actually uses, which is more like: + # + # groups: + # - name: foo + # rules: + # - alert: SomeAlert + # expr: up + # - alert: OtherAlert + # expr: up + transformed_rules = {"groups": []} # type: ignore + for rule in rules["groups"]: + transformed = {"name": str(uuid.uuid4()), "rules": [rule]} + transformed_rules["groups"].append(transformed) + + rule_path.write_text(yaml.dump(transformed_rules)) + + args = [str(self.path), "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output) + return False, ", ".join([line for line in e.output if "error validating" in line]) + + def inject_label_matchers(self, expression, topology) -> str: + """Add label matchers to an expression.""" if not topology: return expression if not self.path: - logger.debug( - "`promql-transform` unavailable. leaving expression unchanged: %s", expression - ) + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) return expression - args = [str(self.path)] + args = [str(self.path), "transform"] args.extend( ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] ) @@ -2279,14 +2187,14 @@ def _apply_label_matcher(self, expression, topology): # noinspection PyBroadException try: return self._exec(args) - except Exception as e: + except subprocess.CalledProcessError as e: logger.debug('Applying the expression failed: "%s", falling back to the original', e) return expression - def _get_transformer_path(self) -> Optional[Path]: + def _get_tool_path(self) -> Optional[Path]: arch = platform.processor() arch = "amd64" if arch == "x86_64" else arch - res = "promql-transform-{}".format(arch) + res = "cos-tool-{}".format(arch) try: path = Path(res).resolve() path.chmod(0o777) @@ -2294,10 +2202,10 @@ def _get_transformer_path(self) -> Optional[Path]: except NotImplementedError: logger.debug("System lacks support for chmod") except FileNotFoundError: - logger.debug('Could not locate promql transform at: "{}"'.format(res)) + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) return None - def _exec(self, cmd): - result = subprocess.run(cmd, check=False, stdout=subprocess.PIPE) + def _exec(self, cmd) -> str: + result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() return output diff --git a/tests/integration/prometheus-tester/lib/charms/observability_libs/v0/juju_topology.py b/tests/integration/prometheus-tester/lib/charms/observability_libs/v0/juju_topology.py new file mode 100644 index 00000000..a065dd53 --- /dev/null +++ b/tests/integration/prometheus-tester/lib/charms/observability_libs/v0/juju_topology.py @@ -0,0 +1,288 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to use the `JujuTopology` class to +create and consume topology information from Juju in a consistent manner. + +The goal of the Juju topology is to uniquely identify a piece +of software running across any of your Juju-managed deployments. +This is achieved by combining the following four elements: + +- Model name +- Model UUID +- Application name +- Unit identifier + + +For a more in-depth description of the concept, as well as a +walk-through of it's use-case in observability, see +[this blog post](https://juju.is/blog/model-driven-observability-part-2-juju-topology-metrics) +on the Juju blog. + +## Library Usage + +This library may be used to create and consume `JujuTopology` objects. +The `JujuTopology` class provides three ways to create instances: + +### Using the `from_charm` method + +Enables instantiation by supplying the charm as an argument. When +creating topology objects for the current charm, this is the recommended +approach. + +```python +topology = JujuTopology.from_charm(self) +``` + +### Using the `from_dict` method + +Allows for instantion using a dictionary of relation data, like the +`scrape_metadata` from Prometheus or the labels of an alert rule. When +creating topology objects for remote charms, this is the recommended +approach. + +```python +scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) +topology = JujuTopology.from_dict(scrape_metadata) +``` + +### Using the class constructor + +Enables instantiation using whatever values you want. While this +is useful in some very specific cases, this is almost certainly not +what you are looking for as setting these values manually may +result in observability metrics which do not uniquely identify a +charm in order to provide accurate usage reporting, alerting, +horizontal scaling, or other use cases. + +```python +topology = JujuTopology( + model="some-juju-model", + model_uuid="00000000-0000-0000-0000-000000000001", + application="fancy-juju-application", + unit="fancy-juju-application/0", + charm_name="fancy-juju-application-k8s", +) +``` + +""" + +import re +from collections import OrderedDict +from typing import Dict, List, Optional + +# The unique Charmhub library identifier, never change it +LIBID = "bced1658f20f49d28b88f61f83c2d232" + +LIBAPI = 0 +LIBPATCH = 1 + + +class InvalidUUIDError(Exception): + """Invalid UUID was provided.""" + + def __init__(self, uuid: str): + self.message = "'{}' is not a valid UUID.".format(uuid) + super().__init__(self.message) + + +class JujuTopology: + """JujuTopology is used for storing, generating and formatting juju topology information.""" + + def __init__( + self, + model: str, + model_uuid: str, + application: str, + unit: str = None, + charm_name: str = None, + ): + """Build a JujuTopology object. + + A `JujuTopology` object is used for storing and transforming + Juju topology information. This information is used to + annotate Prometheus scrape jobs and alert rules. Such + annotation when applied to scrape jobs helps in identifying + the source of the scrapped metrics. On the other hand when + applied to alert rules topology information ensures that + evaluation of alert expressions is restricted to the source + (charm) from which the alert rules were obtained. + + Args: + model: a string name of the Juju model + model_uuid: a globally unique string identifier for the Juju model + application: an application name as a string + unit: a unit name as a string + charm_name: name of charm as a string + """ + if not self.is_valid_uuid(model_uuid): + raise InvalidUUIDError(model_uuid) + + self._model = model + self._model_uuid = model_uuid + self._application = application + self._charm_name = charm_name + self._unit = unit + + def is_valid_uuid(self, uuid): + """Validates the supplied UUID against the Juju Model UUID pattern.""" + regex = re.compile( + "^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}$" + ) + return bool(regex.match(uuid)) + + @classmethod + def from_charm(cls, charm): + """Creates a JujuTopology instance by using the model data available on a charm object. + + Args: + charm: a `CharmBase` object for which the `JujuTopology` will be constructed + Returns: + a `JujuTopology` object. + """ + return cls( + model=charm.model.name, + model_uuid=charm.model.uuid, + application=charm.model.app.name, + unit=charm.model.unit.name, + charm_name=charm.meta.name, + ) + + @classmethod + def from_dict(cls, data: dict): + """Factory method for creating `JujuTopology` children from a dictionary. + + Args: + data: a dictionary with five keys providing topology information. The keys are + - "model" + - "model_uuid" + - "application" + - "unit" + - "charm_name" + `unit` and `charm_name` may be empty, but will result in more limited + labels. However, this allows us to support charms without workloads. + + Returns: + a `JujuTopology` object. + """ + return cls( + model=data["model"], + model_uuid=data["model_uuid"], + application=data["application"], + unit=data.get("unit", ""), + charm_name=data.get("charm_name", ""), + ) + + def as_dict( + self, *, remapped_keys: Dict[str, str] = None, excluded_keys: List[str] = None + ) -> OrderedDict: + """Format the topology information into an ordered dict. + + Keeping the dictionary ordered is important to be able to + compare dicts without having to resort to deep comparisons. + + Args: + remapped_keys: A dictionary mapping old key names to new key names, + which will be substituted when invoked. + excluded_keys: A list of key names to exclude from the returned dict. + uuid_length: The length to crop the UUID to. + """ + ret = OrderedDict( + [ + ("model", self.model), + ("model_uuid", self.model_uuid), + ("application", self.application), + ("unit", self.unit), + ("charm_name", self.charm_name), + ] + ) + if excluded_keys: + ret = OrderedDict({k: v for k, v in ret.items() if k not in excluded_keys}) + + if remapped_keys: + ret = OrderedDict( + (remapped_keys.get(k), v) if remapped_keys.get(k) else (k, v) for k, v in ret.items() # type: ignore + ) + + return ret + + @property + def identifier(self) -> str: + """Format the topology information into a terse string. + + This crops the model UUID, making it unsuitable for comparisons against + anything but other identifiers. Mainly to be used as a display name or file + name where long strings might become an issue. + + >>> JujuTopology( \ + model = "a-model", \ + model_uuid = "00000000-0000-4000-8000-000000000000", \ + application = "some-app", \ + unit = "some-app/1" \ + ).identifier + 'a-model_00000000_some-app' + """ + parts = self.as_dict( + excluded_keys=["unit", "charm_name"], + ) + + parts["model_uuid"] = self.model_uuid_short + values = parts.values() + + return "_".join([str(val) for val in values]).replace("/", "_") + + @property + def label_matcher_dict(self) -> Dict[str, str]: + """Format the topology information into a dict with keys having 'juju_' as prefix. + + Relabelled topology never includes the unit as it would then only match + the leader unit (ie. the unit that produced the dict). + """ + items = self.as_dict( + remapped_keys={"charm_name": "charm"}, + excluded_keys=["unit"], + ).items() + + return {"juju_{}".format(key): value for key, value in items if value} + + @property + def label_matchers(self) -> str: + """Format the topology information into a promql/logql label matcher string. + + Topology label matchers should never include the unit as it + would then only match the leader unit (ie. the unit that + produced the matchers). + """ + items = self.label_matcher_dict.items() + return ", ".join(['{}="{}"'.format(key, value) for key, value in items if value]) + + @property + def model(self) -> str: + """Getter for the juju model value.""" + return self._model + + @property + def model_uuid(self) -> str: + """Getter for the juju model uuid value.""" + return self._model_uuid + + @property + def model_uuid_short(self) -> str: + """Getter for the juju model value, truncated to the first eight letters.""" + return self._model_uuid[:8] + + @property + def application(self) -> str: + """Getter for the juju application value.""" + return self._application + + @property + def charm_name(self) -> Optional[str]: + """Getter for the juju charm name value.""" + return self._charm_name + + @property + def unit(self) -> Optional[str]: + """Getter for the juju unit value.""" + return self._unit diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 0c23f1f5..748637bc 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -22,6 +22,7 @@ } +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestCharm(unittest.TestCase): @patch("charm.KubernetesServicePatch", lambda x, y: None) @patch_network_get() diff --git a/tests/unit/test_endpoint_aggregator.py b/tests/unit/test_endpoint_aggregator.py index e8d64cf1..991c6b73 100644 --- a/tests/unit/test_endpoint_aggregator.py +++ b/tests/unit/test_endpoint_aggregator.py @@ -3,6 +3,7 @@ import json import unittest +from unittest.mock import patch from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointAggregator from ops.charm import CharmBase @@ -77,10 +78,11 @@ def __init__(self, *args, **kwargs): ) +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestEndpointAggregator(unittest.TestCase): def setUp(self): self.harness = Harness(EndpointAggregatorCharm, meta=AGGREGATOR_META) - self.harness.set_model_info(name="testmodel", uuid="1234567890") + self.harness.set_model_info(name="testmodel", uuid="1234567") self.addCleanup(self.harness.cleanup) self.harness.set_leader(True) self.harness.begin_with_initial_hooks() @@ -115,7 +117,7 @@ def test_adding_prometheus_then_target_forwards_a_labeled_scrape_job(self): "targets": ["scrape_target_0:1234"], "labels": { "juju_model": "testmodel", - "juju_model_uuid": "1234567890", + "juju_model_uuid": "1234567", "juju_application": "target-app", "juju_unit": "target-app/0", "host": "scrape_target_0", @@ -169,6 +171,7 @@ def test_adding_prometheus_then_target_forwards_a_labeled_alert_rule(self): } ], } + self.maxDiff = None self.assertDictEqual(group, expected_group) def test_adding_target_then_prometheus_forwards_a_labeled_scrape_job(self): @@ -201,7 +204,7 @@ def test_adding_target_then_prometheus_forwards_a_labeled_scrape_job(self): "targets": ["scrape_target_0:1234"], "labels": { "juju_model": "testmodel", - "juju_model_uuid": "1234567890", + "juju_model_uuid": "1234567", "juju_application": "target-app", "juju_unit": "target-app/0", "host": "scrape_target_0", @@ -297,7 +300,7 @@ def test_scrape_jobs_from_multiple_target_applications_are_forwarded(self): "targets": ["scrape_target_0:1234"], "labels": { "juju_model": "testmodel", - "juju_model_uuid": "1234567890", + "juju_model_uuid": "1234567", "juju_application": "target-app-1", "juju_unit": "target-app-1/0", "host": "scrape_target_0", @@ -313,7 +316,7 @@ def test_scrape_jobs_from_multiple_target_applications_are_forwarded(self): "targets": ["scrape_target_1:5678"], "labels": { "juju_model": "testmodel", - "juju_model_uuid": "1234567890", + "juju_model_uuid": "1234567", "juju_application": "target-app-2", "juju_unit": "target-app-2/0", "host": "scrape_target_1", @@ -445,7 +448,7 @@ def test_scrape_job_removal_differentiates_between_applications(self): "targets": ["scrape_target_0:1234"], "labels": { "juju_model": "testmodel", - "juju_model_uuid": "1234567890", + "juju_model_uuid": "1234567", "juju_application": "target-app-1", "juju_unit": "target-app-1/0", "host": "scrape_target_0", @@ -565,7 +568,7 @@ def test_removing_scrape_jobs_differentiates_between_units(self): "targets": ["scrape_target_0:1234"], "labels": { "juju_model": "testmodel", - "juju_model_uuid": "1234567890", + "juju_model_uuid": "1234567", "juju_application": "target-app", "juju_unit": "target-app/0", "host": "scrape_target_0", diff --git a/tests/unit/test_endpoint_consumer.py b/tests/unit/test_endpoint_consumer.py index 07165db8..e2d914dd 100644 --- a/tests/unit/test_endpoint_consumer.py +++ b/tests/unit/test_endpoint_consumer.py @@ -3,6 +3,7 @@ import json import unittest +from unittest.mock import patch from charms.prometheus_k8s.v0.prometheus_scrape import ( ALLOWED_KEYS, @@ -154,6 +155,7 @@ def version(self): return "1.0.0" +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestEndpointConsumer(unittest.TestCase): def setUp(self): metadata_file = open("metadata.yaml") @@ -489,11 +491,10 @@ def job_name_suffix(job_name, labels, rel_id): Returns: string name of job as set by provider (if any) """ - name_prefix = "juju_{}_{}_{}_{}_prometheus_{}_scrape_".format( + name_prefix = "juju_{}_{}_{}_prometheus_{}_scrape_".format( labels["juju_model"], labels["juju_model_uuid"][:7], labels["juju_application"], - labels["juju_charm"], rel_id, ) return job_name[len(name_prefix) :] diff --git a/tests/unit/test_endpoint_provider.py b/tests/unit/test_endpoint_provider.py index 3e681b3c..b67d95db 100644 --- a/tests/unit/test_endpoint_provider.py +++ b/tests/unit/test_endpoint_provider.py @@ -10,11 +10,11 @@ from unittest.mock import patch import yaml +from charms.observability_libs.v0.juju_topology import JujuTopology from charms.prometheus_k8s.v0.prometheus_scrape import ( ALLOWED_KEYS, AlertRules, MetricsEndpointProvider, - ProviderTopology, RelationInterfaceMismatchError, RelationNotFoundError, RelationRoleMismatchError, @@ -87,13 +87,21 @@ def __init__(self, *args, **kwargs): ) +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestEndpointProvider(unittest.TestCase): + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def setUp(self): self.harness = Harness(EndpointProviderCharm, meta=PROVIDER_META) + self.harness.set_model_name("MyUUID") self.addCleanup(self.harness.cleanup) self.harness.set_leader(True) self.harness.begin() + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def test_provider_default_scrape_relations_not_in_meta(self): """Tests that the Provider raises exception when no promethes_scrape in meta.""" harness = Harness( @@ -112,6 +120,9 @@ def test_provider_default_scrape_relations_not_in_meta(self): ) self.assertRaises(RelationNotFoundError, harness.begin) + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def test_provider_default_scrape_relation_wrong_interface(self): """Tests that Provider raises exception if the default relation has the wrong interface.""" harness = Harness( @@ -130,6 +141,9 @@ def test_provider_default_scrape_relation_wrong_interface(self): ) self.assertRaises(RelationInterfaceMismatchError, harness.begin) + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def test_provider_default_scrape_relation_wrong_role(self): """Tests that Provider raises exception if the default relation has the wrong role.""" harness = Harness( @@ -172,6 +186,9 @@ def test_provider_selects_correct_refresh_event_for_sidecar(self, mock_set_unit_ "charms.prometheus_k8s.v0.prometheus_scrape.MetricsEndpointProvider._set_unit_ip", autospec=True, ) + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def test_provider_selects_correct_refresh_event_for_podspec(self, mock_set_unit_ip): """Tests that Provider raises exception if the default relation has the wrong role.""" harness = Harness( @@ -196,6 +213,9 @@ def test_provider_selects_correct_refresh_event_for_podspec(self, mock_set_unit_ "charms.prometheus_k8s.v0.prometheus_scrape.MetricsEndpointProvider._set_unit_ip", autospec=True, ) + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def test_provider_can_refresh_on_multiple_events(self, mock_set_unit_ip): harness = Harness( EndpointProviderCharmWithMultipleEvents, @@ -211,6 +231,7 @@ def test_provider_can_refresh_on_multiple_events(self, mock_set_unit_ip): - kubernetes """, ) + harness.set_model_name("MyUUID") harness.begin() harness.add_relation(RELATION_NAME, "provider") @@ -327,12 +348,14 @@ class CustomizedEndpointProvider(CustomizableEndpointProviderCharm): return CustomizedEndpointProvider +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestNonStandardProviders(unittest.TestCase): def setup(self, **kwargs): bad_provider_charm = customize_endpoint_provider( alert_rules_path=kwargs["alert_rules_path"] ) self.harness = Harness(bad_provider_charm, meta=PROVIDER_META) + self.harness.set_model_name("MyUUID") self.addCleanup(self.harness.cleanup) self.harness.set_leader(True) self.harness.begin() @@ -358,6 +381,11 @@ def test_a_bad_alert_rules_logs_an_error(self): self.assertIn("Failed to read alert rules from bad_yaml.rule", messages[0]) +def sorted_matchers(matchers) -> str: + parts = [m.strip() for m in matchers.split(",")] + return ",".join(sorted(parts)) + + def expression_labels(expr): """Extract labels from an alert rule expression. @@ -376,7 +404,11 @@ def expression_labels(expr): yield labels +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestAlertRulesWithOneRulePerFile(unittest.TestCase): + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def setUp(self) -> None: free_standing_rule = { "alert": "free_standing", @@ -397,7 +429,7 @@ def setUp(self) -> None: ("rules/prom/prom_format/standard_rule.rule", yaml.safe_dump(rules_file_dict)), ) - self.topology = ProviderTopology("MyModel", "MyUUID", "MyApp", "MyUnit", "MyCharm") + self.topology = JujuTopology("MyModel", "MyUUID", "MyApp", "MyUnit", "MyCharm") def test_non_recursive_is_default(self): rules = AlertRules(topology=self.topology) @@ -413,13 +445,13 @@ def test_non_recursive_lma_format_loading_from_root_dir(self): expected_freestanding_rule = { "alert": "free_standing", "expr": "avg(some_vector[5m]) > 5", - "labels": self.topology.as_promql_label_dict(), + "labels": self.topology.label_matcher_dict, } expected_rules_file = { "groups": [ { - "name": f"{self.topology.identifier}_free_standing_rule_alerts", + "name": f"{sorted_matchers(self.topology.identifier)}_free_standing_rule_alerts", "rules": [expected_freestanding_rule], }, ] @@ -434,8 +466,8 @@ def test_non_recursive_official_format_loading_from_root_dir(self): expected_alert_rule = { "alert": "CPUOverUse", - "expr": f"process_cpu_seconds_total{{{self.topology.promql_labels}}} > 0.12", - "labels": self.topology.as_promql_label_dict(), + "expr": f"process_cpu_seconds_total{{{sorted_matchers(self.topology.label_matchers)}}} > 0.12", + "labels": self.topology.label_matcher_dict, } expected_rules_file = { @@ -446,7 +478,6 @@ def test_non_recursive_official_format_loading_from_root_dir(self): }, ] } - self.assertEqual(expected_rules_file, rules_file_dict) def test_alerts_in_both_formats_are_recursively_aggregated(self): @@ -462,14 +493,14 @@ def test_alerts_in_both_formats_are_recursively_aggregated(self): expected_alert_rule = { "alert": "CPUOverUse", - "expr": f"process_cpu_seconds_total{{{self.topology.promql_labels}}} > 0.12", - "labels": self.topology.as_promql_label_dict(), + "expr": f"process_cpu_seconds_total{{{sorted_matchers(self.topology.label_matchers)}}} > 0.12", + "labels": self.topology.label_matcher_dict, } expected_freestanding_rule = { "alert": "free_standing", "expr": "avg(some_vector[5m]) > 5", - "labels": self.topology.as_promql_label_dict(), + "labels": self.topology.label_matcher_dict, } expected_rules_file = { @@ -504,14 +535,18 @@ def test_unit_not_in_alert_labels(self): self.assertTrue("juju_unit" not in rule["labels"]) +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestAlertRulesWithMultipleRulesPerFile(unittest.TestCase): + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def setUp(self) -> None: - self.topology = ProviderTopology("MyModel", "MyUUID", "MyApp", "MyCharm") + self.topology = JujuTopology("MyModel", "MyUUID", "MyApp", "MyCharm") def gen_rule(self, name, **extra): return { "alert": f"CPUOverUse_{name}", - "expr": "process_cpu_seconds_total > 0.12", + "expr": f"process_cpu_seconds_total{{{sorted_matchers(self.topology.label_matchers)}}} > 0.12", **extra, } @@ -533,15 +568,15 @@ def test_load_multiple_rules_per_file(self): { "name": f"{self.topology.identifier}_group_1_alerts", "rules": [ - self.gen_rule(1, labels=self.topology.as_promql_label_dict()), - self.gen_rule(2, labels=self.topology.as_promql_label_dict()), + self.gen_rule(1, labels=self.topology.label_matcher_dict), + self.gen_rule(2, labels=self.topology.label_matcher_dict), ], }, { "name": f"{self.topology.identifier}_group_2_alerts", "rules": [ - self.gen_rule(1, labels=self.topology.as_promql_label_dict()), - self.gen_rule(2, labels=self.topology.as_promql_label_dict()), + self.gen_rule(1, labels=self.topology.label_matcher_dict), + self.gen_rule(2, labels=self.topology.label_matcher_dict), ], }, ] @@ -570,8 +605,8 @@ def test_duplicated_alert_names_within_alert_rules_list_are_silently_accepted(se { "name": f"{self.topology.identifier}_my_group_alerts", "rules": [ - self.gen_rule("same", labels=self.topology.as_promql_label_dict()), - self.gen_rule("same", labels=self.topology.as_promql_label_dict()), + self.gen_rule("same", labels=self.topology.label_matcher_dict), + self.gen_rule("same", labels=self.topology.label_matcher_dict), ], }, ] @@ -592,15 +627,15 @@ def test_duplicated_group_names_within_a_file_are_silently_accepted(self): { "name": f"{self.topology.identifier}_group_same_alerts", "rules": [ - self.gen_rule(1, labels=self.topology.as_promql_label_dict()), - self.gen_rule(2, labels=self.topology.as_promql_label_dict()), + self.gen_rule(1, labels=self.topology.label_matcher_dict), + self.gen_rule(2, labels=self.topology.label_matcher_dict), ], }, { "name": f"{self.topology.identifier}_group_same_alerts", "rules": [ - self.gen_rule(1, labels=self.topology.as_promql_label_dict()), - self.gen_rule(2, labels=self.topology.as_promql_label_dict()), + self.gen_rule(1, labels=self.topology.label_matcher_dict), + self.gen_rule(2, labels=self.topology.label_matcher_dict), ], }, ] @@ -623,21 +658,22 @@ def test_deeply_nested(self): "groups": [ { "name": f"{self.topology.identifier}_file_alerts", - "rules": [self.gen_rule(0, labels=self.topology.as_promql_label_dict())], + "rules": [self.gen_rule(0, labels=self.topology.label_matcher_dict)], }, { "name": f"{self.topology.identifier}_a_file_alerts", - "rules": [self.gen_rule(1, labels=self.topology.as_promql_label_dict())], + "rules": [self.gen_rule(1, labels=self.topology.label_matcher_dict)], }, { "name": f"{self.topology.identifier}_a_b_file_alerts", - "rules": [self.gen_rule(2, labels=self.topology.as_promql_label_dict())], + "rules": [self.gen_rule(2, labels=self.topology.label_matcher_dict)], }, ] } self.assertDictEqual(expected_rules_file, rules_file_dict_read) +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestAlertRulesContainingUnitTopology(unittest.TestCase): """Tests that check MetricsEndpointProvider does not remove unit topology. @@ -652,6 +688,7 @@ def setup(self, **kwargs): alert_rules_path=kwargs["alert_rules_path"] ) self.harness = Harness(bad_provider_charm, meta=PROVIDER_META) + self.harness.set_model_name("MyUUID") self.addCleanup(self.harness.cleanup) self.harness.set_leader(True) self.harness.begin() @@ -670,11 +707,16 @@ def test_unit_label_is_retained_if_hard_coded(self): self.assertIn("juju_unit=", rule["expr"]) +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestNoLeader(unittest.TestCase): """Tests the case where leader is not set immediately.""" + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def setUp(self): self.harness = Harness(EndpointProviderCharm, meta=PROVIDER_META) + self.harness.set_model_name("MyUUID") self.addCleanup(self.harness.cleanup) self.harness.set_leader(False) self.harness.begin_with_initial_hooks() diff --git a/tests/unit/test_prometheus_rules_provider.py b/tests/unit/test_prometheus_rules_provider.py index 8547b452..910330e9 100644 --- a/tests/unit/test_prometheus_rules_provider.py +++ b/tests/unit/test_prometheus_rules_provider.py @@ -5,6 +5,7 @@ import os import textwrap import unittest +from unittest.mock import patch import yaml from charms.prometheus_k8s.v0.prometheus_scrape import PrometheusRulesProvider @@ -13,6 +14,7 @@ from ops.testing import Harness +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestReloadAlertRules(unittest.TestCase): """Feature: Provider charm can manually invoke reloading of alerts. @@ -26,6 +28,9 @@ class TestReloadAlertRules(unittest.TestCase): # use a short-form free-standing alert, for brevity ALERT = yaml.safe_dump({"alert": "free_standing", "expr": "avg(some_vector[5m]) > 5"}) + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def setUp(self): self.sandbox = TempFolderSandbox() alert_rules_path = os.path.join(self.sandbox.root, "alerts") diff --git a/tests/unit/test_remote_write.py b/tests/unit/test_remote_write.py index 4c0cc9a0..f7430135 100644 --- a/tests/unit/test_remote_write.py +++ b/tests/unit/test_remote_write.py @@ -102,7 +102,11 @@ def _handle_endpoints_changed(self, _): pass +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestRemoteWriteConsumer(unittest.TestCase): + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def setUp(self): self.harness = Harness(RemoteWriteConsumerCharm, meta=METADATA) self.addCleanup(self.harness.cleanup) @@ -113,6 +117,7 @@ def setUp(self): self.addCleanup(patcher.stop) self.mock_capacity.return_value = "1Gi" + self.harness.set_model_name("test") self.harness.begin_with_initial_hooks() def test_address_is_set(self): @@ -210,7 +215,11 @@ def test_alert_rule_has_correct_labels_with_unit(self): assert False # Could not find the correct alert rule to check +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestRemoteWriteProvider(unittest.TestCase): + @patch( + "charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True + ) def setUp(self, *unused): self.harness = Harness(PrometheusCharm) self.harness.set_model_info("lma", "123456") diff --git a/tests/unit/test_transform.py b/tests/unit/test_transform.py index b2ba5416..c7abd368 100644 --- a/tests/unit/test_transform.py +++ b/tests/unit/test_transform.py @@ -4,27 +4,29 @@ import subprocess import unittest from pathlib import PosixPath +from unittest.mock import patch -from charms.prometheus_k8s.v0.prometheus_scrape import PromqlTransformer +from charms.prometheus_k8s.v0.prometheus_scrape import CosTool from ops.charm import CharmBase from ops.testing import Harness # noqa: E302 # pylint: disable=too-few-public-methods -class TransformProviderCharm(CharmBase): +class ToolProviderCharm(CharmBase): """Container charm for running the integration test.""" def __init__(self, *args): super().__init__(*args) - self.transformer = PromqlTransformer(self) + self.tool = CosTool(self) +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) class TestTransform(unittest.TestCase): - """Test that the promql-transform implementation works.""" + """Test that the cos-tool implementation works.""" def setUp(self): - self.harness = Harness(TransformProviderCharm) + self.harness = Harness(ToolProviderCharm) self.harness.set_model_name("transform") self.addCleanup(self.harness.cleanup) self.harness.begin() @@ -32,36 +34,36 @@ def setUp(self): # pylint: disable=protected-access @unittest.mock.patch("platform.processor", lambda: "teakettle") def test_disable_on_invalid_arch(self): - transform = self.harness.charm.transformer - self.assertIsNone(transform.path) - self.assertTrue(transform._disabled) + tool = self.harness.charm.tool + self.assertIsNone(tool.path) + self.assertTrue(tool._disabled) # pylint: disable=protected-access @unittest.mock.patch("platform.processor", lambda: "x86_64") def test_gives_path_on_valid_arch(self): """When given a valid arch, it should return the binary path.""" - transformer = self.harness.charm.transformer + transformer = self.harness.charm.tool self.assertIsInstance(transformer.path, PosixPath) @unittest.mock.patch("platform.processor", lambda: "x86_64") def test_setup_transformer(self): """When setup it should know the path to the binary.""" - transform = self.harness.charm.transformer + tool = self.harness.charm.tool - self.assertIsInstance(transform.path, PosixPath) + self.assertIsInstance(tool.path, PosixPath) - p = str(transform.path) - self.assertTrue(p.endswith("promql-transform-amd64")) + p = str(tool.path) + self.assertTrue(p.endswith("cos-tool-amd64")) @unittest.mock.patch("platform.processor", lambda: "x86_64") @unittest.mock.patch("subprocess.run") def test_returns_original_expression_when_subprocess_call_errors(self, mocked_run): mocked_run.side_effect = subprocess.CalledProcessError( - returncode=10, cmd="promql-transform", stderr="" + returncode=10, cmd="cos-tool", stderr="" ) - transform = self.harness.charm.transformer - output = transform.apply_label_matchers( + tool = self.harness.charm.tool + output = tool.apply_label_matchers( { "groups": [ { @@ -87,8 +89,8 @@ def test_returns_original_expression_when_subprocess_call_errors(self, mocked_ru @unittest.mock.patch("platform.processor", lambda: "invalid") def test_uses_original_expression_when_binary_missing(self): - transform = self.harness.charm.transformer - output = transform.apply_label_matchers( + tool = self.harness.charm.tool + output = tool.apply_label_matchers( { "groups": [ { @@ -114,21 +116,21 @@ def test_uses_original_expression_when_binary_missing(self): @unittest.mock.patch("platform.processor", lambda: "x86_64") def test_fetches_the_correct_expression(self): - transform = self.harness.charm.transformer + tool = self.harness.charm.tool - output = transform._apply_label_matcher("up", {"juju_model": "some_juju_model"}) + output = tool.inject_label_matchers("up", {"juju_model": "some_juju_model"}) assert output == 'up{juju_model="some_juju_model"}' @unittest.mock.patch("platform.processor", lambda: "x86_64") def test_handles_comparisons(self): - transform = self.harness.charm.transformer - output = transform._apply_label_matcher("up > 1", {"juju_model": "some_juju_model"}) + tool = self.harness.charm.tool + output = tool.inject_label_matchers("up > 1", {"juju_model": "some_juju_model"}) assert output == 'up{juju_model="some_juju_model"} > 1' @unittest.mock.patch("platform.processor", lambda: "x86_64") def test_handles_multiple_labels(self): - transform = self.harness.charm.transformer - output = transform._apply_label_matcher( + tool = self.harness.charm.tool + output = tool.inject_label_matchers( "up > 1", { "juju_model": "some_juju_model", @@ -141,3 +143,58 @@ def test_handles_multiple_labels(self): output == 'up{juju_application="some_application",juju_model="some_juju_model"' ',juju_model_uuid="123ABC",juju_unit="some_application/1"} > 1' ) + + +@patch("charms.observability_libs.v0.juju_topology.JujuTopology.is_valid_uuid", lambda *args: True) +class TestValidateAlerts(unittest.TestCase): + """Test that the cos-tool validation works.""" + + def setUp(self): + self.harness = Harness(ToolProviderCharm) + self.harness.set_model_name("validate") + self.addCleanup(self.harness.cleanup) + self.harness.begin() + + @unittest.mock.patch("platform.processor", lambda: "x86_64") + def test_returns_errors_on_bad_rule_file(self): + tool = self.harness.charm.tool + valid, errs = tool.validate_alert_rules( + { + "groups": [ + { + "alert": "BadSyntax", + "expr": "process_cpu_seconds_total{) > 0.12", + } + ] + } + ) + self.assertEqual(valid, False) + self.assertIn(errs, "error validating:") + + @unittest.mock.patch("platform.processor", lambda: "x86_64") + def test_successfully_validates_good_alert_rules(self): + tool = self.harness.charm.tool + valid, errs = tool.validate_alert_rules( + { + "groups": [ + { + "alert": "CPUOverUse", + "expr": "process_cpu_seconds_total > 0.12", + "for": "0m", + "labels": { + "severity": "Low", + "juju_model": "None", + "juju_model_uuid": "f2c1b2a6-e006-11eb-ba80-0242ac130004", + "juju_application": "consumer-tester", + }, + "annotations": { + "summary": "Instance {{ $labels.instance }} CPU over use", + "description": "{{ $labels.instance }} of job " + "{{ $labels.job }} has used too much CPU.", + }, + } + ] + } + ) + self.assertEqual(errs, "") + self.assertEqual(valid, True) diff --git a/tox.ini b/tox.ini index 75f7dbd0..91e0c09a 100644 --- a/tox.ini +++ b/tox.ini @@ -84,7 +84,7 @@ deps = deepdiff httpcore==0.14.7 commands = - /usr/bin/env sh -c 'stat promql-transform-amd64 > /dev/null 2>&1 || curl -L -O https://github.com/canonical/promql-transform/releases/latest/download/promql-transform-amd64' + /usr/bin/env sh -c 'stat cos-tool-amd64 > /dev/null 2>&1 || curl -L -O https://github.com/canonical/cos-tool/releases/latest/download/cos-tool-amd64' coverage run \ --source={[vars]src_path},{[vars]lib_path} \ -m pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/unit