From cf3b2ea49ed2eeae0cbb4b0370d4513ede2c2c03 Mon Sep 17 00:00:00 2001 From: Orfeas Kourkakis Date: Wed, 6 Nov 2024 16:13:13 +0200 Subject: [PATCH] fix: Comment out firing alert rule (#412) This comments out an alert rule that's always `Firing` due to kubeflow/notebooks#62 Ref #409 --- .../model_errors.archived_rule | 12 ++++++++++++ .../src/prometheus_alert_rules/model_errors.rule | 10 ---------- .../jupyter-controller/tests/unit/test_operator.py | 8 +++++--- 3 files changed, 17 insertions(+), 13 deletions(-) create mode 100644 charms/jupyter-controller/src/prometheus_alert_rules/model_errors.archived_rule delete mode 100644 charms/jupyter-controller/src/prometheus_alert_rules/model_errors.rule diff --git a/charms/jupyter-controller/src/prometheus_alert_rules/model_errors.archived_rule b/charms/jupyter-controller/src/prometheus_alert_rules/model_errors.archived_rule new file mode 100644 index 00000000..18db6b24 --- /dev/null +++ b/charms/jupyter-controller/src/prometheus_alert_rules/model_errors.archived_rule @@ -0,0 +1,12 @@ +# Uncomment the alert below and modify the file's extension to be `.rule` +# once https://github.com/canonical/notebook-operators/issues/409 is fixed +# alert: JupyterControllerRuntimeReconciliationErrorsExceedThreshold +# expr: rate(controller_runtime_reconcile_errors_total[5m]) > 0 +# for: 0m +# labels: +# severity: critical +# annotations: +# summary: Total number of reconciliation errors per controller +# description: > +# Total number of reconciliation errors per controller +# LABELS = {{ $labels }} diff --git a/charms/jupyter-controller/src/prometheus_alert_rules/model_errors.rule b/charms/jupyter-controller/src/prometheus_alert_rules/model_errors.rule deleted file mode 100644 index 90d3a65c..00000000 --- a/charms/jupyter-controller/src/prometheus_alert_rules/model_errors.rule +++ /dev/null @@ -1,10 +0,0 @@ -alert: JupyterControllerRuntimeReconciliationErrorsExceedThreshold -expr: rate(controller_runtime_reconcile_errors_total[5m]) > 0 -for: 0m -labels: - severity: critical -annotations: - summary: Total number of reconciliation errors per controller - description: > - Total number of reconciliation errors per controller - LABELS = {{ $labels }} diff --git a/charms/jupyter-controller/tests/unit/test_operator.py b/charms/jupyter-controller/tests/unit/test_operator.py index 7ebf2770..91cb0688 100644 --- a/charms/jupyter-controller/tests/unit/test_operator.py +++ b/charms/jupyter-controller/tests/unit/test_operator.py @@ -112,9 +112,11 @@ def test_prometheus_data_set(self, harness: Harness, mocker): # there 2 alert rules in host_resources.rules for rule in file_alert["groups"][0]["rules"]: test_alerts.append(rule["alert"]) - with open("src/prometheus_alert_rules/model_errors.rule") as f: - file_alert = yaml.safe_load(f.read()) - test_alerts.append(file_alert["alert"]) + # Uncomment once https://github.com/canonical/notebook-operators/issues/409 + # is fixed. + # with open("src/prometheus_alert_rules/model_errors.rule") as f: + # file_alert = yaml.safe_load(f.read()) + # test_alerts.append(file_alert["alert"]) with open("src/prometheus_alert_rules/KubeflowJupyterControllerServices.rules") as f: file_alert = yaml.safe_load(f.read()) # there 2 alert rules in host_resources.rules