Skip to content

Commit

Permalink
NH-57036: Fix metrics relation to Nodes on Fargate
Browse files Browse the repository at this point in the history
* Don't use 'kubernetes_io_hostname' on Fargate Nodes
* Use 'service.instance.id' from Resource attributes instead of DataPoint attributes
* Remove nonexistent metrics from mocked data
  • Loading branch information
pstranak-sw committed Oct 4, 2023
1 parent 2ab76aa commit 42278c2
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 813 deletions.
13 changes: 0 additions & 13 deletions build/docker/wiremockFiles/redirectKubeStateMetricsResponse.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,6 @@ kube_pod_container_resource_limits{container="test-container",endpoint="http",in
kube_pod_container_resource_limits{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",resource="memory",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",unit="byte",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 3.221225472e+09 1675856675021
kube_pod_container_resource_limits{container="test-container",endpoint="tcp-model",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",resource="memory",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",unit="byte",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 3.221225472e+09 1675856675021
kube_pod_container_resource_limits{container="test-container",endpoint="tcp-model",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",resource="cpu",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",unit="core",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 0.1 1675856675021
# TYPE kube_pod_container_resource_limits_cpu_cores untyped
kube_pod_container_resource_limits_cpu_cores{container="test-container",endpoint="tcp-model",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",service="test-service",uid="71057e83-7723-4db5-a7ca-52ad7904e34d",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 1 1675856675021
# TYPE kube_pod_container_resource_limits_memory_bytes untyped
kube_pod_container_resource_limits_memory_bytes{container="test-container",endpoint="tcp-model",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",service="test-service",uid="03d2b55c-b225-476b-9178-c74f8e5eaba2",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 2.68435456e+08 1675856675021
kube_pod_container_resource_limits_memory_bytes{container="test-container",endpoint="tcp-model",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 3.221225472e+09 1675856675021
# TYPE kube_pod_container_resource_requests untyped
kube_pod_container_resource_requests{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",resource="cpu",service="test-service",uid="f15ca7ca-af33-4f43-a793-ec3176b31842",unit="core",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 0.1 1675856675021
kube_pod_container_resource_requests{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",node="test-node",pod="test-pod",resource="memory",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",unit="byte",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 3.221225472e+09 1675856675021
Expand Down Expand Up @@ -242,14 +237,6 @@ kube_pod_info{container="test-container",created_by_kind="ReplicaSet",created_by
kube_pod_init_container_info{container="test-container",container_id="docker://f1c98663d614379552d6c9aae831b4eb3e149c469d4d589b62ea077cf3dad807",endpoint="http",image="busybox:1.29.2",image_id="docker-pullable://busybox@sha256:cb63aa0641a885f54de20f61d152187419e8f6b159ed11a251a09d115fdff9bd",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="9cab81cb-9da1-4029-ac51-c7c3024c6fbf",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 1 1675856675021
kube_pod_init_container_info{container="test-container",container_id="docker://f55a97f615b1a90f15f114def871a7d4f0ed8a32d9a329cc7e2f94a24d4780c1",endpoint="http",image="fullstorydev/grpcurl:latest",image_id="docker-pullable://fullstorydev/grpcurl@sha256:d42ef512419560776bee5bb51e338a1734a0edb99f450b6d98fd98bcc93796f3",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 1 1675856675021
kube_pod_init_container_info{container="test-container",container_id="docker://6fb07210b86971939dbfb16ae47f98afdb8214a52a21dd5fc26f21ae35c09d9e",endpoint="http",image="busybox:latest",image_id="docker-pullable://busybox@sha256:b5d6fe0712636ceb7430189de28819e195e8966372edfc2d9409d79402a0dc16",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 1 1675856675021
# TYPE kube_pod_init_container_resource_limits_cpu_cores untyped
kube_pod_init_container_resource_limits_cpu_cores{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="2a82ed12-a31a-427a-adb9-d14cf6a4a063",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 0.1 1675856675021
# TYPE kube_pod_init_container_resource_limits_memory_bytes untyped
kube_pod_init_container_resource_limits_memory_bytes{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="a920ed2f-477d-4ad7-93d6-3222aabfece2",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 1.073741824e+09 1675856675021
# TYPE kube_pod_init_container_resource_requests_cpu_cores untyped
kube_pod_init_container_resource_requests_cpu_cores{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="c7f7a05a-a1b5-4fd0-a611-8e2a0e7acbf6",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 0.25 1675856675021
# TYPE kube_pod_init_container_resource_requests_memory_bytes untyped
kube_pod_init_container_resource_requests_memory_bytes{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="c7f7a05a-a1b5-4fd0-a611-8e2a0e7acbf6",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 6.7108864e+07 1675856675021
# TYPE kube_pod_init_container_status_ready untyped
kube_pod_init_container_status_ready{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="64b6d309-44e3-4a41-8883-c15c7cc9bc4a",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 1 1675856675021
kube_pod_init_container_status_ready{container="test-container",endpoint="http",instance="test-node",job="test-job",namespace="test-namespace",pod="test-pod",service="test-service",uid="bafeef2c-1292-4a5e-a92c-d709480b04b6",prometheus="prometheus-system/kube-prometheus-kube-prome-prometheus",prometheus_replica="prometheus-kube-prometheus-kube-prome-prometheus-0"} 1 1675856675021
Expand Down
4 changes: 3 additions & 1 deletion deploy/helm/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

## [2.8.0-alpha.2] - 2023-09-14
## [2.8.0-alpha.2] - 2023-09-22

### Added

- Add monitoring windows node logs

### Fixed

- Detection of Node name for Fargate Nodes's metrics

## [2.8.0-alpha.1] - 2023-09-11

Expand Down
37 changes: 11 additions & 26 deletions deploy/helm/metrics-collector-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,31 +50,16 @@ processors:
datapoint:
- 'attributes["container"] == "POD" and IsMatch(metric.name, "container_network_.*|k8s.container.*") == true'


# unify attributes
attributes/unify_node_attribute:
include:
match_type: regexp
metric_names:
- container_.*
- kube_node_.*
- kube_pod_info
- kube_pod_container_resource_requests
- kube_pod_container_resource_limits
- kube_pod_init_container_resource_requests
- kube_pod_init_container_resource_limits
actions:
- key: k8s.node.name
from_attribute: node
action: insert
{{- if not .Values.aws_fargate.enabled }}
- key: k8s.node.name
from_attribute: kubernetes_io_hostname
action: insert
{{- end }}
- key: k8s.node.name
from_attribute: service.instance.id
action: insert
transform/unify_node_attribute:
metric_statements:
- context: datapoint
statements:
# when available, use "node" attribute for Node name
- set(attributes["k8s.node.name"], attributes["node"]) where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$") == true and attributes["k8s.node.name"] == nil
# "kubernetes_io_hostname", unlike "service.instance.id", provides a nice Node name in environments like local Docker, but for Fargate, its value is different from the other attributes
- set(attributes["k8s.node.name"], attributes["kubernetes_io_hostname"]) where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$") == true and attributes["eks_amazonaws_com_compute_type"] != "fargate" and attributes["k8s.node.name"] == nil
# use "service.instance.id" for Node name when the above attributes are not available
- set(attributes["k8s.node.name"], resource.attributes["service.instance.id"]) where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$") == true and attributes["k8s.node.name"] == nil

attributes/unify_volume_attribute:
include:
Expand Down Expand Up @@ -1483,7 +1468,7 @@ service:
- filter/receiver
- transform
- filter/remove_internal
- attributes/unify_node_attribute
- transform/unify_node_attribute
- attributes/unify_volume_attribute
- attributes/identify_init_container
- attributes/identify_standard_container
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,24 +334,6 @@ Metrics config should match snapshot when using default values:
match_type: regexp
metric_names:
- .*
attributes/unify_node_attribute:
actions:
- action: insert
from_attribute: node
key: k8s.node.name
- action: insert
from_attribute: service.instance.id
key: k8s.node.name
include:
match_type: regexp
metric_names:
- container_.*
- kube_node_.*
- kube_pod_info
- kube_pod_container_resource_requests
- kube_pod_container_resource_limits
- kube_pod_init_container_resource_requests
- kube_pod_init_container_resource_limits
attributes/unify_volume_attribute:
actions:
- action: insert
Expand Down Expand Up @@ -1926,6 +1908,20 @@ Metrics config should match snapshot when using default values:
- delete_key(resource.attributes, "sw.k8s.job.found")
- delete_key(resource.attributes, "sw.k8s.cronjob.found")
- delete_key(resource.attributes, "sw.k8s.node.found")
transform/unify_node_attribute:
metric_statements:
- context: datapoint
statements:
- set(attributes["k8s.node.name"], attributes["node"]) where IsMatch(metric.name,
"^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], attributes["kubernetes_io_hostname"]) where
IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["eks_amazonaws_com_compute_type"] != "fargate" and
attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], resource.attributes["service.instance.id"])
where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
receivers:
k8s_events: null
prometheus/kube-state-metrics:
Expand Down Expand Up @@ -1988,7 +1984,7 @@ Metrics config should match snapshot when using default values:
- filter/receiver
- transform
- filter/remove_internal
- attributes/unify_node_attribute
- transform/unify_node_attribute
- attributes/unify_volume_attribute
- attributes/identify_init_container
- attributes/identify_standard_container
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,27 +334,6 @@ Metrics config should match snapshot when using default values:
match_type: regexp
metric_names:
- .*
attributes/unify_node_attribute:
actions:
- action: insert
from_attribute: node
key: k8s.node.name
- action: insert
from_attribute: kubernetes_io_hostname
key: k8s.node.name
- action: insert
from_attribute: service.instance.id
key: k8s.node.name
include:
match_type: regexp
metric_names:
- container_.*
- kube_node_.*
- kube_pod_info
- kube_pod_container_resource_requests
- kube_pod_container_resource_limits
- kube_pod_init_container_resource_requests
- kube_pod_init_container_resource_limits
attributes/unify_volume_attribute:
actions:
- action: insert
Expand Down Expand Up @@ -1929,6 +1908,20 @@ Metrics config should match snapshot when using default values:
- delete_key(resource.attributes, "sw.k8s.job.found")
- delete_key(resource.attributes, "sw.k8s.cronjob.found")
- delete_key(resource.attributes, "sw.k8s.node.found")
transform/unify_node_attribute:
metric_statements:
- context: datapoint
statements:
- set(attributes["k8s.node.name"], attributes["node"]) where IsMatch(metric.name,
"^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], attributes["kubernetes_io_hostname"]) where
IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["eks_amazonaws_com_compute_type"] != "fargate" and
attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], resource.attributes["service.instance.id"])
where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
receivers:
k8s_events: null
prometheus/kube-state-metrics:
Expand Down Expand Up @@ -1991,7 +1984,7 @@ Metrics config should match snapshot when using default values:
- filter/receiver
- transform
- filter/remove_internal
- attributes/unify_node_attribute
- transform/unify_node_attribute
- attributes/unify_volume_attribute
- attributes/identify_init_container
- attributes/identify_standard_container
Expand Down
Loading

0 comments on commit 42278c2

Please sign in to comment.