Skip to content

Commit

Permalink
NH-57036: Fix metrics relation to Nodes on Fargate
Browse files Browse the repository at this point in the history
* Don't use 'kubernetes_io_hostname' on Fargate Nodes
* Use 'service.instance.id' from Resource attributes instead of DataPoint attributes
  • Loading branch information
pstranak-sw committed Sep 24, 2023
1 parent 2ab76aa commit 3ffaffc
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 143 deletions.
4 changes: 3 additions & 1 deletion deploy/helm/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

## [2.8.0-alpha.2] - 2023-09-14
## [2.8.0-alpha.2] - 2023-09-22

### Added

- Add monitoring windows node logs

### Fixed

- Detection of Node name for Fargate Nodes's metrics

## [2.8.0-alpha.1] - 2023-09-11

Expand Down
37 changes: 11 additions & 26 deletions deploy/helm/metrics-collector-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,31 +50,16 @@ processors:
datapoint:
- 'attributes["container"] == "POD" and IsMatch(metric.name, "container_network_.*|k8s.container.*") == true'


# unify attributes
attributes/unify_node_attribute:
include:
match_type: regexp
metric_names:
- container_.*
- kube_node_.*
- kube_pod_info
- kube_pod_container_resource_requests
- kube_pod_container_resource_limits
- kube_pod_init_container_resource_requests
- kube_pod_init_container_resource_limits
actions:
- key: k8s.node.name
from_attribute: node
action: insert
{{- if not .Values.aws_fargate.enabled }}
- key: k8s.node.name
from_attribute: kubernetes_io_hostname
action: insert
{{- end }}
- key: k8s.node.name
from_attribute: service.instance.id
action: insert
transform/unify_node_attribute:
metric_statements:
- context: datapoint
statements:
# when available, use "node" attribute for Node name
- set(attributes["k8s.node.name"], attributes["node"]) where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$") == true and attributes["k8s.node.name"] == nil
# "kubernetes_io_hostname", unlike "service.instance.id", provides a nice Node name in environments like local Docker, but for Fargate, its value is different from the other attributes
- set(attributes["k8s.node.name"], attributes["kubernetes_io_hostname"]) where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$") == true and attributes["eks_amazonaws_com_compute_type"] != "fargate" and attributes["k8s.node.name"] == nil
# use "service.instance.id" for Node name when the above attributes are not available
- set(attributes["k8s.node.name"], resource.attributes["service.instance.id"]) where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$") == true and attributes["k8s.node.name"] == nil

attributes/unify_volume_attribute:
include:
Expand Down Expand Up @@ -1483,7 +1468,7 @@ service:
- filter/receiver
- transform
- filter/remove_internal
- attributes/unify_node_attribute
- transform/unify_node_attribute
- attributes/unify_volume_attribute
- attributes/identify_init_container
- attributes/identify_standard_container
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,24 +334,6 @@ Metrics config should match snapshot when using default values:
match_type: regexp
metric_names:
- .*
attributes/unify_node_attribute:
actions:
- action: insert
from_attribute: node
key: k8s.node.name
- action: insert
from_attribute: service.instance.id
key: k8s.node.name
include:
match_type: regexp
metric_names:
- container_.*
- kube_node_.*
- kube_pod_info
- kube_pod_container_resource_requests
- kube_pod_container_resource_limits
- kube_pod_init_container_resource_requests
- kube_pod_init_container_resource_limits
attributes/unify_volume_attribute:
actions:
- action: insert
Expand Down Expand Up @@ -1926,6 +1908,20 @@ Metrics config should match snapshot when using default values:
- delete_key(resource.attributes, "sw.k8s.job.found")
- delete_key(resource.attributes, "sw.k8s.cronjob.found")
- delete_key(resource.attributes, "sw.k8s.node.found")
transform/unify_node_attribute:
metric_statements:
- context: datapoint
statements:
- set(attributes["k8s.node.name"], attributes["node"]) where IsMatch(metric.name,
"^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], attributes["kubernetes_io_hostname"]) where
IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["eks_amazonaws_com_compute_type"] != "fargate" and
attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], resource.attributes["service.instance.id"])
where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
receivers:
k8s_events: null
prometheus/kube-state-metrics:
Expand Down Expand Up @@ -1988,7 +1984,7 @@ Metrics config should match snapshot when using default values:
- filter/receiver
- transform
- filter/remove_internal
- attributes/unify_node_attribute
- transform/unify_node_attribute
- attributes/unify_volume_attribute
- attributes/identify_init_container
- attributes/identify_standard_container
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,27 +334,6 @@ Metrics config should match snapshot when using default values:
match_type: regexp
metric_names:
- .*
attributes/unify_node_attribute:
actions:
- action: insert
from_attribute: node
key: k8s.node.name
- action: insert
from_attribute: kubernetes_io_hostname
key: k8s.node.name
- action: insert
from_attribute: service.instance.id
key: k8s.node.name
include:
match_type: regexp
metric_names:
- container_.*
- kube_node_.*
- kube_pod_info
- kube_pod_container_resource_requests
- kube_pod_container_resource_limits
- kube_pod_init_container_resource_requests
- kube_pod_init_container_resource_limits
attributes/unify_volume_attribute:
actions:
- action: insert
Expand Down Expand Up @@ -1929,6 +1908,20 @@ Metrics config should match snapshot when using default values:
- delete_key(resource.attributes, "sw.k8s.job.found")
- delete_key(resource.attributes, "sw.k8s.cronjob.found")
- delete_key(resource.attributes, "sw.k8s.node.found")
transform/unify_node_attribute:
metric_statements:
- context: datapoint
statements:
- set(attributes["k8s.node.name"], attributes["node"]) where IsMatch(metric.name,
"^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], attributes["kubernetes_io_hostname"]) where
IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["eks_amazonaws_com_compute_type"] != "fargate" and
attributes["k8s.node.name"] == nil
- set(attributes["k8s.node.name"], resource.attributes["service.instance.id"])
where IsMatch(metric.name, "^(container_.*)|(kube_node_.*)|(kube_pod_info)|(kube_pod_container_resource_requests)|(kube_pod_container_resource_limits)|(kube_pod_init_container_resource_requests)|(kube_pod_init_container_resource_limits)$")
== true and attributes["k8s.node.name"] == nil
receivers:
k8s_events: null
prometheus/kube-state-metrics:
Expand Down Expand Up @@ -1991,7 +1984,7 @@ Metrics config should match snapshot when using default values:
- filter/receiver
- transform
- filter/remove_internal
- attributes/unify_node_attribute
- transform/unify_node_attribute
- attributes/unify_volume_attribute
- attributes/identify_init_container
- attributes/identify_standard_container
Expand Down
Loading

0 comments on commit 3ffaffc

Please sign in to comment.