From e1ddbc7495a70c31bc7e75eac0e464a0d76d65db Mon Sep 17 00:00:00 2001 From: Jonathan Gazeley Date: Wed, 28 Aug 2024 23:32:20 +0100 Subject: [PATCH 1/3] Add serviceMonitor --- .../templates/servicemonitor.yaml | 28 +++++++++++++++++++ dist/charts/ping-exporter/values.yaml | 4 +++ 2 files changed, 32 insertions(+) create mode 100644 dist/charts/ping-exporter/templates/servicemonitor.yaml diff --git a/dist/charts/ping-exporter/templates/servicemonitor.yaml b/dist/charts/ping-exporter/templates/servicemonitor.yaml new file mode 100644 index 0000000..86ae438 --- /dev/null +++ b/dist/charts/ping-exporter/templates/servicemonitor.yaml @@ -0,0 +1,28 @@ +{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "ping_exporter.fullname" . }} + labels: + {{- include "ping_exporter.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "ping_exporter.selectorLabels" . | nindent 6 }} + endpoints: + - port: http + interval: 60s + relabelings: + - action: labeldrop + regex: pod + sourceLabels: [] + - action: labeldrop + regex: namespace + sourceLabels: [] + - action: labeldrop + regex: instance + sourceLabels: [] + - action: labeldrop + regex: job + sourceLabels: [] +{{- end }} diff --git a/dist/charts/ping-exporter/values.yaml b/dist/charts/ping-exporter/values.yaml index 3429316..f4e4e58 100644 --- a/dist/charts/ping-exporter/values.yaml +++ b/dist/charts/ping-exporter/values.yaml @@ -101,3 +101,7 @@ config: timeout: 3s history-size: 42 payload-size: 120 + +# Create a serviceMonitor resource to be consumed by Prometheus Operator +serviceMonitor: + enabled: false From d641e66515a6cd1059a6c7f38bb131108c7b5a23 Mon Sep 17 00:00:00 2001 From: Jonathan Gazeley Date: Thu, 29 Aug 2024 20:31:51 +0100 Subject: [PATCH 2/3] Add Prometheus alerting rules --- dist/charts/ping-exporter/files/prometheus.rules | 16 ++++++++++++++++ .../ping-exporter/templates/prometheusrule.yaml | 14 ++++++++++++++ dist/charts/ping-exporter/values.yaml | 4 ++++ 3 files changed, 34 insertions(+) create mode 100644 dist/charts/ping-exporter/files/prometheus.rules create mode 100644 dist/charts/ping-exporter/templates/prometheusrule.yaml diff --git a/dist/charts/ping-exporter/files/prometheus.rules b/dist/charts/ping-exporter/files/prometheus.rules new file mode 100644 index 0000000..1974e8a --- /dev/null +++ b/dist/charts/ping-exporter/files/prometheus.rules @@ -0,0 +1,16 @@ + - alert: HighPingLossRatio + expr: round(ping_loss_ratio * 100) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: High ping loss ratio for {{ $labels.target }} + description: "Ping loss ratio for {{ $labels.target }} is {{ $value }}" + - alert: HighPingRtt + expr: round(ping_rtt_mean_seconds * 1000, 0.1) > 100 + for: 5m + labels: + severity: warning + annotations: + summary: High ping latency for {{ $labels.target }} + description: "Ping latency for {{ $labels.target }} is {{ $value }} seconds" diff --git a/dist/charts/ping-exporter/templates/prometheusrule.yaml b/dist/charts/ping-exporter/templates/prometheusrule.yaml new file mode 100644 index 0000000..5b2677d --- /dev/null +++ b/dist/charts/ping-exporter/templates/prometheusrule.yaml @@ -0,0 +1,14 @@ +{{- if .Values.prometheusRules.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: service-prometheus + role: alert-rules + name: {{ include "ping_exporter.fullname" . }} +spec: + groups: + - name: ping_exporter.rules + rules: +{{ .Files.Get "files/prometheus.rules" }} +{{- end }} diff --git a/dist/charts/ping-exporter/values.yaml b/dist/charts/ping-exporter/values.yaml index f4e4e58..1fe3a1f 100644 --- a/dist/charts/ping-exporter/values.yaml +++ b/dist/charts/ping-exporter/values.yaml @@ -105,3 +105,7 @@ config: # Create a serviceMonitor resource to be consumed by Prometheus Operator serviceMonitor: enabled: false + +# Create basic Prometheus alerting rules +prometheusRules: + enabled: false From 1fb83effbbadc6836a626a0bcd80a41fc6ef4995 Mon Sep 17 00:00:00 2001 From: Jonathan Gazeley Date: Thu, 29 Aug 2024 22:39:04 +0100 Subject: [PATCH 3/3] Tweak threshold as it is too sensitive --- dist/charts/ping-exporter/files/prometheus.rules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/charts/ping-exporter/files/prometheus.rules b/dist/charts/ping-exporter/files/prometheus.rules index 1974e8a..9f574fc 100644 --- a/dist/charts/ping-exporter/files/prometheus.rules +++ b/dist/charts/ping-exporter/files/prometheus.rules @@ -1,11 +1,11 @@ - alert: HighPingLossRatio - expr: round(ping_loss_ratio * 100) > 0 + expr: round(ping_loss_ratio * 100) > 5 for: 5m labels: severity: warning annotations: summary: High ping loss ratio for {{ $labels.target }} - description: "Ping loss ratio for {{ $labels.target }} is {{ $value }}" + description: "Ping loss ratio for {{ $labels.target }} is {{ $value }}%" - alert: HighPingRtt expr: round(ping_rtt_mean_seconds * 1000, 0.1) > 100 for: 5m