Skip to content

Commit

Permalink
Fix template variable usage (#199)
Browse files Browse the repository at this point in the history
* Update host_health.rules

Signed-off-by: Leon <[email protected]>

* Move individual alerts under group

* Rename ext to match contents

---------

Signed-off-by: Leon <[email protected]>
  • Loading branch information
sed-i authored Nov 20, 2024
1 parent a5e2f57 commit 2eecc8f
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 18 deletions.
8 changes: 0 additions & 8 deletions src/prometheus_alert_rules/high_cpu_iowait.rule

This file was deleted.

11 changes: 11 additions & 0 deletions src/prometheus_alert_rules/high_cpu_iowait.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
groups:
- name: HostCpu
rules:
- alert: HostCpuHighIowait
expr: avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 10
for: 0m
labels:
severity: warning
annotations:
summary: Host CPU high iowait (instance {{ $labels.instance }})
description: "CPU iowait > 10%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
4 changes: 2 additions & 2 deletions src/prometheus_alert_rules/host_health.rules
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ groups:
labels:
severity: critical
annotations:
summary: Host '{{labels.instance}}' is down.
summary: Host '{{ $labels.instance }}' is down.
description: >-
Host '{{ $labels.instance }}' is down.
VALUE = {{ $value }}
Expand All @@ -18,7 +18,7 @@ groups:
labels:
severity: critical
annotations:
summary: Metrics not received from host '{{labels.instance}}'.
summary: Metrics not received from host '{{ $labels.instance }}'.
description: >-
The metrics endpoint for host '{{ $labels.instance }}' is unreachable.
VALUE = {{ $value }}
Expand Down
8 changes: 0 additions & 8 deletions src/prometheus_alert_rules/oomkill.rule

This file was deleted.

11 changes: 11 additions & 0 deletions src/prometheus_alert_rules/oomkill.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
groups:
- name: HostMemory
rules:
- alert: HostOomKillDetected
expr: increase(node_vmstat_oom_kill[1h]) > 0
for: 0m
labels:
severity: warning
annotations:
summary: Host OOM kill detected (instance {{ $labels.instance }})
description: "OOM kill detected\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

0 comments on commit 2eecc8f

Please sign in to comment.