From 387b734b07c831b258acc638acec4b7f052cb26d Mon Sep 17 00:00:00 2001 From: Adam Fabian Date: Tue, 3 Dec 2024 12:43:24 -0600 Subject: [PATCH] feat: alert for MariaDB backup failures JIRA:OSPC-550 --- .../prometheus/alerting_rules.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/base-helm-configs/prometheus/alerting_rules.yaml b/base-helm-configs/prometheus/alerting_rules.yaml index f63aad63..6117bd8a 100644 --- a/base-helm-configs/prometheus/alerting_rules.yaml +++ b/base-helm-configs/prometheus/alerting_rules.yaml @@ -123,3 +123,21 @@ additionalPrometheusRulesMap: annotations: summary: OVN backup volume >= 90% disk usage description: "OVN backup volume >= 90% disk usage" + - name: MariaDB backup alerts + rules: + - alert: mariadbBackupWarning + expr: time() - kube_cronjob_status_last_successful_time{cronjob="mariadb-backup"} > 21600 + for: 1h + labels: + severity: warning + annotations: + summary: Last MariaDB backup not successful within 1 hour of scheduled run + description: "Last MariaDB backup not successful within 1 hour of scheduled run" + - alert: mariadbBackupCritical + expr: time() - kube_cronjob_status_last_successful_time{cronjob="mariadb-backup"} > 43200 + for: 1h + labels: + severity: critical + annotations: + summary: Second successive MariaDB backup not successful within 1 hour of scheduled run + description: "Second successive MariaDB backup not successful within 1 hour of scheduled run"