diff --git a/CHANGELOG.md b/CHANGELOG.md index f3d2e14968..4b993c3e11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -98,6 +98,7 @@ * [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412 * [BUGFIX] Alerts: Exclude failed cache "add" operations from alerting since failures are expected in normal operation. #9658 * [BUGFIX] Alerts: Exclude read-only replicas from `IngesterInstanceHasNoTenants` alert. #9843 +* [BUGFIX] Alerts: Use resident set memory for the `EtcdAllocatingTooMuchMemory` alert so that ephemeral file cache memory doesn't cause the alert to misfire. #9997 ### Jsonnet diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml index 929e6927b5..8f5d18333d 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml @@ -605,7 +605,7 @@ spec: runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory expr: | ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.65 @@ -619,7 +619,7 @@ spec: runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory expr: | ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.8 diff --git a/operations/mimir-mixin-compiled-baremetal/alerts.yaml b/operations/mimir-mixin-compiled-baremetal/alerts.yaml index 9283e7e8ee..d4776cf7c2 100644 --- a/operations/mimir-mixin-compiled-baremetal/alerts.yaml +++ b/operations/mimir-mixin-compiled-baremetal/alerts.yaml @@ -583,7 +583,7 @@ groups: runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory expr: | ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.65 @@ -597,7 +597,7 @@ groups: runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory expr: | ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.8 diff --git a/operations/mimir-mixin-compiled/alerts.yaml b/operations/mimir-mixin-compiled/alerts.yaml index be782a551d..275dd27111 100644 --- a/operations/mimir-mixin-compiled/alerts.yaml +++ b/operations/mimir-mixin-compiled/alerts.yaml @@ -593,7 +593,7 @@ groups: runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory expr: | ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.65 @@ -607,7 +607,7 @@ groups: runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#etcdallocatingtoomuchmemory expr: | ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.8 diff --git a/operations/mimir-mixin/alerts/alerts.libsonnet b/operations/mimir-mixin/alerts/alerts.libsonnet index f66af7829a..75c7e1ca78 100644 --- a/operations/mimir-mixin/alerts/alerts.libsonnet +++ b/operations/mimir-mixin/alerts/alerts.libsonnet @@ -875,7 +875,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; alert: 'EtcdAllocatingTooMuchMemory', expr: ||| ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.65 @@ -894,7 +894,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; alert: 'EtcdAllocatingTooMuchMemory', expr: ||| ( - container_memory_working_set_bytes{container="etcd"} + container_memory_rss{container="etcd"} / ( container_spec_memory_limit_bytes{container="etcd"} > 0 ) ) > 0.8