diff --git a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index e5ebb619..2f475dc6 100644 --- a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -333,7 +333,6 @@ def _on_scrape_targets_changed(self, event): import socket import subprocess import tempfile -import textwrap from collections import defaultdict from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union @@ -399,38 +398,39 @@ def _on_scrape_targets_changed(self, event): DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/prometheus_alert_rules" -GENERIC_ALERT_RULES_GROUP = yaml.safe_load( - textwrap.dedent( - """ - groups: - - name: HostHealth - rules: - - alert: HostDown - expr: up < 1 - for: 5m - labels: - severity: critical - annotations: - summary: Host '{{ $labels.instance }}' is down. - description: >- - Host '{{ $labels.instance }}' is down, failed to scrape. - VALUE = {{ $value }} - LABELS = {{ $labels }} - - alert: HostMetricsMissing - # This alert is applicable only when the provider is linked via an aggregator (such as grafana agent) - expr: absent(up) - for: 5m - labels: - severity: critical - annotations: - summary: Metrics not received from host '{{ $labels.instance }}', failed to remote write. - description: >- - Metrics not received from host '{{ $labels.instance }}', failed to remote write. - VALUE = {{ $value }} - LABELS = {{ $labels }} - """ - ) -) +GENERIC_ALERT_RULES_GROUP = { + "groups": [ + { + "name": "HostHealth", + "rules": [ + { + "alert": "HostDown", + "expr": "up < 1", + "for": "5m", + "labels": {"severity": "critical"}, + "annotations": { + "summary": "Host '{{ $labels.instance }}' is down.", + "description": """Host '{{ $labels.instance }}' is down, failed to scrape. + VALUE = {{ $value }} + LABELS = {{ $labels }}""", + }, + }, + { + "alert": "HostMetricsMissing", + "expr": "absent(up)", + "for": "5m", + "labels": {"severity": "critical"}, + "annotations": { + "summary": "Metrics not received from host '{{ $labels.instance }}', failed to remote write.", + "description": """Metrics not received from host '{{ $labels.instance }}', failed to remote write. + VALUE = {{ $value }} + LABELS = {{ $labels }}""", + }, + }, + ], + } + ] +} class PrometheusConfig: diff --git a/lib/charms/prometheus_k8s/v1/prometheus_remote_write.py b/lib/charms/prometheus_k8s/v1/prometheus_remote_write.py index 94dd78af..d7b0b9fa 100644 --- a/lib/charms/prometheus_k8s/v1/prometheus_remote_write.py +++ b/lib/charms/prometheus_k8s/v1/prometheus_remote_write.py @@ -21,7 +21,6 @@ import socket import subprocess import tempfile -import textwrap from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union @@ -47,7 +46,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 4 +LIBPATCH = 5 PYDEPS = ["git+https://github.com/canonical/cos-lib.git@feature/generic-alerts#egg=cosl"] @@ -61,26 +60,27 @@ DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/prometheus_alert_rules" -GENERIC_ALERT_RULES_GROUP = yaml.safe_load( - textwrap.dedent( - """ - groups: - - name: AggregatorHostHealth - rules: - - alert: HostMetricsMissing - expr: absent(up) - for: 5m - labels: - severity: critical - annotations: - summary: Metrics not received from host '{{ $labels.instance }}', failed to remote write. - description: >- - Metrics not received from host '{{ $labels.instance }}', failed to remote write. - VALUE = {{ $value }} - LABELS = {{ $labels }} - """ - ) -) +GENERIC_ALERT_RULES_GROUP = { + "groups": [ + { + "name": "AggregatorHostHealth", + "rules": [ + { + "alert": "HostMetricsMissing", + "expr": "absent(up)", + "for": "5m", + "labels": {"severity": "critical"}, + "annotations": { + "summary": "Metrics not received from host '{{ $labels.instance }}', failed to remote write.", + "description": """Metrics not received from host '{{ $labels.instance }}', failed to remote write. + VALUE = {{ $value }} + LABELS = {{ $labels }}""", + }, + } + ], + } + ] +} class RelationNotFoundError(Exception): diff --git a/tests/unit/test_endpoint_provider.py b/tests/unit/test_endpoint_provider.py index f4d69d4f..ee2ad12e 100644 --- a/tests/unit/test_endpoint_provider.py +++ b/tests/unit/test_endpoint_provider.py @@ -753,11 +753,11 @@ def test_unit_label_is_retained_if_hard_coded(self): # check unit topology is present in labels and in alert rule expression relation = self.harness.charm.model.get_relation("metrics-endpoint") alert_rules = json.loads(relation.data[self.harness.charm.app].get("alert_rules")) - from pprint import pprint - pprint(alert_rules) for group in alert_rules["groups"]: for rule in group["rules"]: - if "_HostHealth_alerts" not in group["name"]: # _HostHealth_alerts are injected alerts without juju_unit labels + if ( + "_HostHealth_alerts" not in group["name"] + ): # _HostHealth_alerts are injected alerts without juju_unit labels self.assertIn("juju_unit", rule["labels"]) self.assertIn("juju_unit=", rule["expr"]) diff --git a/tox.ini b/tox.ini index b1c393ee..57b76083 100644 --- a/tox.ini +++ b/tox.ini @@ -45,7 +45,7 @@ commands = [testenv:static-{charm,lib,unit,integration}] description = Run static analysis checks deps = - cosl + git+https://github.com/canonical/cos-lib.git@feature/generic-alerts#egg=cosl pyright charm: -r{toxinidir}/requirements.txt lib: ops