Skip to content

Commit

Permalink
tf: module-monitoring-19 Group ES metrics by cluster (#1095)
Browse files Browse the repository at this point in the history
* tf: module-monitoring-19 Group ES metrics by cluster

Grouping by elasticsearch cluster should ensure that there are not
erroneous alerts fired about a loss of metric series when the
identity of the exporter pod changes

* add changelog
  • Loading branch information
tarrow authored Aug 14, 2023
1 parent d98ae84 commit a424749
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tf/modules/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# tf-module-monitoring-19
- Change prometheus-elasticsearch metric alerts to group by cluster

# tf-module-monitoring-18
- Change QueryService PV alert alignment from sum to none

Expand Down
17 changes: 17 additions & 0 deletions tf/modules/monitoring/metric-alarms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ locals {
threshold_value = 1
duration = "60s"
condition_absent = "300s"
min_group_by = "metric.label.es_cluster"
},
}
}
Expand All @@ -33,13 +34,29 @@ resource "google_monitoring_alert_policy" "alert_policy_prometheus_metric" {
count = each.value.trigger_count
}
threshold_value = each.value.threshold_value
aggregations {
alignment_period = "300s"
cross_series_reducer = "REDUCE_MIN"
group_by_fields = [
each.value.min_group_by,
]
per_series_aligner = "ALIGN_MEAN"
}
}
}
conditions {
display_name = "${each.value.display_name} absent"
condition_absent {
duration = each.value.condition_absent
filter = "resource.type = \"prometheus_target\" AND resource.labels.cluster = \"${var.cluster_name}\" AND ${each.value.filter}"
aggregations {
alignment_period = "300s"
cross_series_reducer = "REDUCE_MIN"
group_by_fields = [
each.value.min_group_by,
]
per_series_aligner = "ALIGN_MEAN"
}
}
}
combiner = "OR"
Expand Down

0 comments on commit a424749

Please sign in to comment.