From 7a3d3b785883d1aedad2a5d3393e0d3a99ce5a88 Mon Sep 17 00:00:00 2001 From: marcel-dempers Date: Sun, 2 Feb 2020 20:28:41 +1100 Subject: [PATCH] cluster monitoring components --- .../grafana/grafana.dashboards.configmap.yaml | 4599 +++++++++++------ .../cluster-role-binding.yaml | 15 + .../kube-state-metrics/cluster-role.yaml | 110 + .../1.14.8/kube-state-metrics/deployment.yaml | 42 + .../kube-state-metrics/service-account.yaml | 7 + .../kube-state-metrics/service-monitor.yaml | 22 + .../1.14.8/kube-state-metrics/service.yaml | 20 + .../apiserver.servicemonitor.yaml | 24 + .../kubelet.servicemonitor.yaml | 77 + .../prometheus-standalone/prometheus.yaml | 10 - 10 files changed, 3232 insertions(+), 1694 deletions(-) create mode 100644 prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role-binding.yaml create mode 100644 prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role.yaml create mode 100644 prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/deployment.yaml create mode 100644 prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-account.yaml create mode 100644 prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-monitor.yaml create mode 100644 prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service.yaml create mode 100644 prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/apiserver.servicemonitor.yaml create mode 100644 prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/kubelet.servicemonitor.yaml diff --git a/prometheus-monitoring/kubernetes/1.14.8/grafana/grafana.dashboards.configmap.yaml b/prometheus-monitoring/kubernetes/1.14.8/grafana/grafana.dashboards.configmap.yaml index c6208d86e..ed55af857 100644 --- a/prometheus-monitoring/kubernetes/1.14.8/grafana/grafana.dashboards.configmap.yaml +++ b/prometheus-monitoring/kubernetes/1.14.8/grafana/grafana.dashboards.configmap.yaml @@ -93,7 +93,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(up{job=\"apiserver\"})", + "expr": "sum(up{job=\"apiserver\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -160,28 +160,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"2..\", cluster=\"$cluster\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"3..\", cluster=\"$cluster\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"4..\", cluster=\"$cluster\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"5..\", cluster=\"$cluster\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", @@ -242,15 +242,15 @@ items: }, "id": 4, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -272,7 +272,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (verb, le))", + "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", verb!=\"WATCH\", cluster=\"$cluster\"}[5m])) by (verb, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}}", @@ -376,7 +376,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{name}}", @@ -467,7 +467,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{name}}", @@ -528,15 +528,15 @@ items: }, "id": 7, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -558,7 +558,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name, le))", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{name}}", @@ -662,7 +662,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\"}", + "expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -753,14 +753,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (intance)", + "expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (intance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} hit", "refId": "A" }, { - "expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} miss", @@ -851,14 +851,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} get", "refId": "A" }, { - "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} miss", @@ -962,7 +962,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\"}", + "expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1053,7 +1053,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\"}[5m])", + "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1144,7 +1144,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\"}", + "expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1210,8 +1210,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -1224,6 +1224,33 @@ items: "regex": "", "type": "datasource" }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(apiserver_request_total, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -1238,7 +1265,7 @@ items: "options": [ ], - "query": "label_values(apiserver_request_total{job=\"apiserver\"}, instance)", + "query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -1347,15 +1374,11 @@ items: "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -1363,44 +1386,100 @@ items: "y": 1 }, "id": 3, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "instant": null, "intervalFactor": 1, "legendFormat": "{{namespace}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Current Rate of Bytes Received", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -1408,75 +1487,135 @@ items: "y": 1 }, "id": 4, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "instant": null, "intervalFactor": 1, "legendFormat": "{{namespace}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Current Rate of Bytes Transmitted", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "columns": [ { - "text": "", - "value": "" + "text": "Time", + "value": "Time" }, { - "text": "", - "value": "" + "text": "Value #A", + "value": "Value #A" }, { - "text": "", - "value": "" + "text": "Value #B", + "value": "Value #B" }, { - "text": "", - "value": "" + "text": "Value #C", + "value": "Value #C" }, { - "text": "", - "value": "" + "text": "Value #D", + "value": "Value #D" }, { - "text": "", - "value": "" + "text": "Value #E", + "value": "Value #E" }, { - "text": "", - "value": "" + "text": "Value #F", + "value": "Value #F" }, { - "text": "", - "value": "" + "text": "Value #G", + "value": "Value #G" }, { - "text": "", - "value": "" + "text": "Value #H", + "value": "Value #H" }, { - "text": "", - "value": "" + "text": "namespace", + "value": "namespace" } ], - "datasource": "prometheus", + "datasource": "$datasource", "fill": 1, "fontSize": "90%", "gridPos": { @@ -1511,7 +1650,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Time", "thresholds": [ ], @@ -1529,7 +1668,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #A", "thresholds": [ ], @@ -1547,7 +1686,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #B", "thresholds": [ ], @@ -1565,7 +1704,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #C", "thresholds": [ ], @@ -1583,7 +1722,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #D", "thresholds": [ ], @@ -1601,7 +1740,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #E", "thresholds": [ ], @@ -1619,7 +1758,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #F", "thresholds": [ ], @@ -1637,7 +1776,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #G", "thresholds": [ ], @@ -1655,7 +1794,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #H", "thresholds": [ ], @@ -1673,7 +1812,7 @@ items: "link": true, "linkTooltip": "Drill down", "linkUrl": "d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=$__cell", - "pattern": "", + "pattern": "namespace", "thresholds": [ ], @@ -1755,6 +1894,8 @@ items: "step": 10 } ], + "timeFrom": null, + "timeShift": null, "title": "Current Status", "type": "table" }, @@ -1773,15 +1914,11 @@ items: "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -1789,44 +1926,100 @@ items: "y": 11 }, "id": 7, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "instant": null, "intervalFactor": 1, "legendFormat": "{{namespace}}", - "refId": "A" + "refId": "A", + "step": 10 } ], - "title": "Average Rate of Bytes Received", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "aliasColors": { + "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -1834,30 +2027,90 @@ items: "y": 11 }, "id": 8, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "instant": null, "intervalFactor": 1, "legendFormat": "{{namespace}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Average Rate of Bytes Transmitted", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], "repeat": null, @@ -1896,7 +2149,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -1995,7 +2248,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2105,7 +2358,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2204,7 +2457,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2323,7 +2576,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2422,7 +2675,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2513,260 +2766,109 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Errors", - "titleSize": "h6", - "type": "row" - } - ], - "refresh": "30s", - "rows": [ - - ], - "schemaVersion": 18, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" }, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "30s", - "value": "30s" + { + "aliasColors": { + }, - { - "selected": true, - "text": "5m", - "value": "5m" + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 59 }, - { - "selected": false, - "text": "1h", - "value": "1h" - } - ], - "query": "30s,5m,1h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "prometheus", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interval", - "options": [ - { - "selected": true, - "text": "4h", - "value": "4h" - } - ], - "query": "4h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ + "id": 18, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "targetBlank": true, + "title": "What is TCP Retransmit?", + "url": "https://accedian.com/enterprises/blog/network-packet-loss-retransmissions-and-duplicate-acknowledgements/" + } + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Networking / Cluster", - "uid": "ff635a025bcfea7bc3dd4f508990a3e9", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-cluster-total - namespace: monitoring -- apiVersion: v1 - data: - controller-manager.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 2, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum(up{job=\"kube-controller-manager\"})", + "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 } ], - "thresholds": "", - "title": "Up", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of TCP Retransimts out of all sent segments", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" + ] }, { "aliasColors": { @@ -2776,28 +2878,39 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 59 }, - "id": 3, + "id": 19, "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ - + { + "targetBlank": true, + "title": "Why monitor SYN retransmits?", + "url": "https://github.com/prometheus/node_exporter/issues/1023#issuecomment-408128365" + } ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -2807,16 +2920,17 @@ items: ], "spaceLength": 10, - "span": 10, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -2824,10 +2938,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Add Rate", + "title": "Rate of TCP SYN Retransimts out of all retransmits", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -2842,19 +2956,19 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -2863,71 +2977,432 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + } + ], + "refresh": "30s", + "rows": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Networking / Cluster", + "uid": "ff635a025bcfea7bc3dd4f508990a3e9", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-cluster-total + namespace: monitoring +- apiVersion: v1 + data: + controller-manager.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": "$datasource", - "fill": 1, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 4, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, + "id": 2, + "interval": null, "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sum(up{job=\"kube-controller-manager\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", + "legendFormat": "", "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{name}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Work Queue Add Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{name}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Work Queue Depth", "tooltip": { "shared": false, @@ -2990,15 +3465,15 @@ items: }, "id": 5, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -3310,15 +3785,15 @@ items: }, "id": 8, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -3692,8 +4167,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -6232,8 +6707,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -6282,7 +6757,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -8069,8 +8544,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -8146,7 +8621,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -9051,8 +9526,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -10602,8 +11077,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -10706,7 +11181,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -12608,8 +13083,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -12739,7 +13214,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -14749,8 +15224,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -14826,7 +15301,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -15014,7 +15489,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\"})", + "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -15098,7 +15573,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -15182,7 +15657,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -15266,7 +15741,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -15350,7 +15825,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -15434,7 +15909,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))", + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -15484,15 +15959,15 @@ items: }, "id": 8, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -15514,7 +15989,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -15575,15 +16050,15 @@ items: }, "id": 9, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -15605,7 +16080,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -15679,15 +16154,15 @@ items: }, "id": 10, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -15709,7 +16184,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -15783,15 +16258,15 @@ items: }, "id": 11, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -15813,14 +16288,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} pod", "refId": "A" }, { - "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} worker", @@ -15881,15 +16356,15 @@ items: }, "id": 12, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -15911,14 +16386,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} pod", "refId": "A" }, { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} worker", @@ -15992,17 +16467,17 @@ items: }, "id": 13, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", + "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16024,7 +16499,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", @@ -16085,17 +16560,17 @@ items: }, "id": 14, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", + "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16117,7 +16592,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", @@ -16191,17 +16666,17 @@ items: }, "id": 15, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", + "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", + "rightSide": true, "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16223,7 +16698,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", @@ -16297,15 +16772,15 @@ items: }, "id": 16, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16327,7 +16802,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{operation_type}}", @@ -16388,15 +16863,15 @@ items: }, "id": 17, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16418,7 +16893,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", @@ -16493,15 +16968,15 @@ items: }, "id": 18, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16523,7 +16998,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -16584,15 +17059,15 @@ items: }, "id": 19, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16614,7 +17089,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -16688,15 +17163,15 @@ items: }, "id": 20, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16718,7 +17193,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -16822,28 +17297,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", @@ -16917,15 +17392,15 @@ items: }, "id": 22, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -16947,7 +17422,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{verb}} {{url}}", @@ -17051,7 +17526,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -17142,7 +17617,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -17233,7 +17708,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -17299,8 +17774,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -17353,7 +17828,7 @@ items: "options": [ ], - "query": "label_values(kubelet_runtime_operations{cluster=\"$cluster\", job=\"kubelet\"}, instance)", + "query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -17467,7 +17942,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -17594,7 +18069,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -17715,39 +18190,39 @@ items: { "columns": [ { - "text": "", - "value": "" + "text": "Time", + "value": "Time" }, { - "text": "", - "value": "" + "text": "Value #A", + "value": "Value #A" }, { - "text": "", - "value": "" + "text": "Value #B", + "value": "Value #B" }, { - "text": "", - "value": "" + "text": "Value #C", + "value": "Value #C" }, { - "text": "", - "value": "" + "text": "Value #D", + "value": "Value #D" }, { - "text": "", - "value": "" + "text": "Value #E", + "value": "Value #E" }, { - "text": "", - "value": "" + "text": "Value #F", + "value": "Value #F" }, { - "text": "", - "value": "" + "text": "pod", + "value": "pod" } ], - "datasource": "prometheus", + "datasource": "$datasource", "fill": 1, "fontSize": "100%", "gridPos": { @@ -17782,7 +18257,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Time", "thresholds": [ ], @@ -17800,7 +18275,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #A", "thresholds": [ ], @@ -17818,7 +18293,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #B", "thresholds": [ ], @@ -17836,7 +18311,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #C", "thresholds": [ ], @@ -17854,7 +18329,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #D", "thresholds": [ ], @@ -17872,7 +18347,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #E", "thresholds": [ ], @@ -17890,7 +18365,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #F", "thresholds": [ ], @@ -17908,7 +18383,7 @@ items: "link": true, "linkTooltip": "Drill down", "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", - "pattern": "", + "pattern": "pod", "thresholds": [ ], @@ -17972,6 +18447,8 @@ items: "step": 10 } ], + "timeFrom": null, + "timeShift": null, "title": "Current Status", "type": "table" }, @@ -18003,7 +18480,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -18102,7 +18579,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -18212,7 +18689,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18311,7 +18788,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18430,7 +18907,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18529,7 +19006,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18642,6 +19119,22 @@ items: ], "templating": { "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".+", "auto": false, @@ -18651,7 +19144,7 @@ items: "text": "kube-system", "value": "kube-system" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, "includeAll": true, @@ -18683,7 +19176,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -18728,7 +19221,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -18851,15 +19344,11 @@ items: "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -18867,44 +19356,100 @@ items: "y": 1 }, "id": 3, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A" + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Current Rate of Bytes Received", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -18912,75 +19457,135 @@ items: "y": 1 }, "id": 4, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A" + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Current Rate of Bytes Transmitted", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "columns": [ { - "text": "", - "value": "" + "text": "Time", + "value": "Time" }, { - "text": "", - "value": "" + "text": "Value #A", + "value": "Value #A" }, { - "text": "", - "value": "" + "text": "Value #B", + "value": "Value #B" }, { - "text": "", - "value": "" + "text": "Value #C", + "value": "Value #C" }, { - "text": "", - "value": "" + "text": "Value #D", + "value": "Value #D" }, { - "text": "", - "value": "" + "text": "Value #E", + "value": "Value #E" }, { - "text": "", - "value": "" + "text": "Value #F", + "value": "Value #F" }, { - "text": "", - "value": "" + "text": "Value #G", + "value": "Value #G" }, { - "text": "", - "value": "" + "text": "Value #H", + "value": "Value #H" }, { - "text": "", - "value": "" + "text": "workload", + "value": "workload" } ], - "datasource": "prometheus", + "datasource": "$datasource", "fill": 1, "fontSize": "90%", "gridPos": { @@ -19015,7 +19620,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Time", "thresholds": [ ], @@ -19033,7 +19638,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #A", "thresholds": [ ], @@ -19051,7 +19656,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #B", "thresholds": [ ], @@ -19069,7 +19674,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #C", "thresholds": [ ], @@ -19087,7 +19692,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #D", "thresholds": [ ], @@ -19105,7 +19710,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #E", "thresholds": [ ], @@ -19123,7 +19728,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #F", "thresholds": [ ], @@ -19141,7 +19746,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #G", "thresholds": [ ], @@ -19159,7 +19764,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "", + "pattern": "Value #H", "thresholds": [ ], @@ -19177,7 +19782,7 @@ items: "link": true, "linkTooltip": "Drill down", "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", - "pattern": "", + "pattern": "workload", "thresholds": [ ], @@ -19259,6 +19864,8 @@ items: "step": 10 } ], + "timeFrom": null, + "timeShift": null, "title": "Current Status", "type": "table" }, @@ -19277,15 +19884,11 @@ items: "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -19293,44 +19896,100 @@ items: "y": 20 }, "id": 7, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", - "targets": [ + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A" + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Average Rate of Bytes Received", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -19338,30 +19997,90 @@ items: "y": 20 }, "id": 8, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A" + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Average Rate of Bytes Transmitted", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], "repeat": null, @@ -19400,7 +20119,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19499,7 +20218,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19609,7 +20328,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19708,7 +20427,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19827,7 +20546,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19926,7 +20645,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -20039,6 +20758,22 @@ items: ], "templating": { "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": null, "auto": false, @@ -20048,7 +20783,7 @@ items: "text": "kube-system", "value": "kube-system" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, "includeAll": false, @@ -20080,7 +20815,7 @@ items: "text": "deployment", "value": "deployment" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", "hide": 0, "includeAll": false, @@ -20112,7 +20847,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -20157,7 +20892,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -21117,8 +21852,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -22065,8 +22800,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -23190,14 +23925,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used Space", "refId": "A" }, { - "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Free Space", @@ -23305,7 +24040,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -23385,14 +24120,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used inodes", "refId": "A" }, { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": " Free inodes", @@ -23500,7 +24235,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -23542,8 +24277,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -23596,7 +24331,7 @@ items: "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, @@ -23622,7 +24357,7 @@ items: "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)", "refresh": 2, "regex": "", "sort": 1, @@ -23736,7 +24471,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -23863,7 +24598,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -24009,7 +24744,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -24108,7 +24843,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -24179,37 +24914,716 @@ items: "show": true, "values": [ - ] + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 8, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Networking / Pod", + "uid": "7a18067ce943a40ae25454675c19ff5c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-pod-total + namespace: monitoring +- apiVersion: v1 + data: + pods.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "$datasource", + "enable": true, + "expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)", + "hide": false, + "iconColor": "rgba(215, 44, 44, 1)", + "name": "Restarts", + "showIn": 0, + "tags": [ + "restart" + ], + "type": "rows" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 8, + "collapse": false, + "collapsed": false, "panels": [ { "aliasColors": { @@ -24218,36 +25632,29 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fill": 2, + "datasource": "$datasource", + "fill": 1, "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 21 + }, - "id": 9, + "id": 2, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, + "alignAsTable": true, + "avg": true, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -24258,16 +25665,36 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum by(container) (container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "Current: {{ container }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" + }, + { + "expr": "sum by(container) (container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cache: {{ container }}", + "refId": "D" } ], "thresholds": [ @@ -24275,10 +25702,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "Memory Usage", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -24293,7 +25720,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -24301,7 +25728,7 @@ items: "show": true }, { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -24309,7 +25736,20 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -24317,36 +25757,29 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fill": 2, + "datasource": "$datasource", + "fill": 1, "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 21 + }, - "id": 10, + "id": 3, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, + "alignAsTable": true, + "avg": true, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -24357,16 +25790,29 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum by (container) (irate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[4m]))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "Current: {{ container }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" } ], "thresholds": [ @@ -24374,10 +25820,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "CPU Usage", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -24392,7 +25838,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -24400,7 +25846,7 @@ items: "show": true }, { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -24413,21 +25859,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Packets", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 21 - }, - "id": 11, + "collapse": false, + "collapsed": false, "panels": [ { "aliasColors": { @@ -24436,36 +25875,29 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fill": 2, + "datasource": "$datasource", + "fill": 1, "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 + }, - "id": 12, + "id": 4, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, + "alignAsTable": true, + "avg": true, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -24476,16 +25908,22 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "RX: {{ pod }}", + "refId": "A" + }, + { + "expr": "sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "TX: {{ pod }}", + "refId": "B" } ], "thresholds": [ @@ -24493,10 +25931,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "Network I/O", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -24511,7 +25949,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -24519,7 +25957,7 @@ items: "show": true }, { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -24527,7 +25965,20 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -24535,36 +25986,29 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fill": 2, + "datasource": "$datasource", + "fill": 1, "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 + }, - "id": 13, + "id": 5, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, + "alignAsTable": true, + "avg": true, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -24575,16 +26019,15 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "Restarts: {{ container }}", + "refId": "A" } ], "thresholds": [ @@ -24592,10 +26035,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Total Restarts Per Container", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -24610,7 +26053,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -24618,7 +26061,7 @@ items: "show": true }, { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -24631,17 +26074,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Errors", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" } ], - "refresh": "30s", - "rows": [ - - ], - "schemaVersion": 18, + "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" @@ -24649,28 +26088,38 @@ items: "templating": { "list": [ { - "allValue": ".+", - "auto": false, - "auto_count": 30, - "auto_min": "10s", "current": { - "text": "kube-system", - "value": "kube-system" + "text": "default", + "value": "default" }, - "datasource": "prometheus", - "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, - "includeAll": true, "label": null, - "multi": false, - "name": "namespace", + "name": "datasource", "options": [ ], - "query": "label_values(container_network_receive_packets_total, namespace)", + "query": "prometheus", "refresh": 1, "regex": "", - "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ @@ -24681,28 +26130,22 @@ items: "useTags": false }, { - "allValue": ".+", - "auto": false, - "auto_count": 30, - "auto_min": "10s", + "allValue": null, "current": { - "text": "", - "value": "" + }, - "datasource": "prometheus", - "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "datasource": "$datasource", "hide": 0, "includeAll": false, - "label": null, + "label": "Namespace", "multi": false, - "name": "pod", + "name": "namespace", "options": [ ], - "query": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", - "refresh": 1, + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 2, "regex": "", - "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ @@ -24714,82 +26157,54 @@ items: }, { "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", "current": { - "text": "5m", - "value": "5m" + }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "30s", - "value": "30s" - }, - { - "selected": true, - "text": "5m", - "value": "5m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - } + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [ + ], - "query": "30s,5m,1h", + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", "refresh": 2, "regex": "", - "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "interval", + "type": "query", "useTags": false }, { "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", "current": { - "text": "5m", - "value": "5m" + }, - "datasource": "prometheus", - "hide": 2, - "includeAll": false, - "label": null, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Container", "multi": false, - "name": "interval", + "name": "container", "options": [ - { - "selected": true, - "text": "4h", - "value": "4h" - } + ], - "query": "4h", + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", "refresh": 2, "regex": "", - "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "interval", + "type": "query", "useTags": false } ] @@ -24824,17 +26239,17 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Networking / Pod", - "uid": "7a18067ce943a40ae25454675c19ff5c", + "title": "Kubernetes / Pods", + "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-pod-total + name: grafana-dashboard-pods namespace: monitoring - apiVersion: v1 data: - pods.json: |- + prometheus-remote-write.json: |- { "__inputs": [ @@ -24844,23 +26259,10 @@ items: ], "annotations": { "list": [ - { - "builtIn": 1, - "datasource": "$datasource", - "enable": true, - "expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)", - "hide": false, - "iconColor": "rgba(215, 44, 44, 1)", - "name": "Restarts", - "showIn": 0, - "tags": [ - "restart" - ], - "type": "rows" - } + ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, @@ -24888,137 +26290,12 @@ items: }, "id": 2, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(container) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current: {{ container }}", - "refId": "A" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "refId": "B" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "refId": "C" - }, - { - "expr": "sum by(container) (container_memory_cache{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Cache: {{ container }}", - "refId": "D" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, + "alignAsTable": false, + "avg": false, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false @@ -25038,30 +26315,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (container) (irate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[4m]))", + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Current: {{ container }}", + "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", "refId": "A" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "refId": "B" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "refId": "C" } ], "thresholds": [ @@ -25069,9 +26332,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Highest Timestamp In vs. Highest Timestamp Sent", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -25091,7 +26354,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -25099,24 +26362,11 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -25129,14 +26379,14 @@ items: "gridPos": { }, - "id": 4, + "id": 3, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, + "alignAsTable": false, + "avg": false, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false @@ -25156,23 +26406,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RX: {{ pod }}", - "refId": "A" - }, - { - "expr": "sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", + "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "TX: {{ pod }}", - "refId": "B" + "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "refId": "A" } ], "thresholds": [ @@ -25180,9 +26423,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network I/O", + "title": "Rate[5m]", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -25198,19 +26441,19 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -25219,8 +26462,8 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Timestamps", "titleSize": "h6", "type": "row" }, @@ -25240,14 +26483,14 @@ items: "gridPos": { }, - "id": 5, + "id": 4, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, + "alignAsTable": false, + "avg": false, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false @@ -25272,10 +26515,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", + "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Restarts: {{ container }}", + "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", "refId": "A" } ], @@ -25284,9 +26527,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Total Restarts Per Container", + "title": "Rate, in vs. succeeded or dropped [5m]", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -25298,229 +26541,36 @@ items: "show": true, "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Pod", - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "Container", - "multi": false, - "name": "container", - "options": [ - - ], - "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Pods", - "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-pods - namespace: monitoring -- apiVersion: v1 - data: - prometheus-remote-write.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Samples", + "titleSize": "h6", + "type": "row" + }, { "collapse": false, "collapsed": false, @@ -25537,7 +26587,7 @@ items: "gridPos": { }, - "id": 2, + "id": 5, "legend": { "alignAsTable": false, "avg": false, @@ -25554,6 +26604,7 @@ items: "links": [ ], + "minSpan": 6, "nullPointMode": "null", "percentage": false, "pointradius": 5, @@ -25564,12 +26615,12 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n", + "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", @@ -25581,7 +26632,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Highest Timestamp In vs. Highest Timestamp Sent", + "title": "Current Shards", "tooltip": { "shared": true, "sort": 0, @@ -25628,7 +26679,7 @@ items: "gridPos": { }, - "id": 3, + "id": 6, "legend": { "alignAsTable": false, "avg": false, @@ -25655,12 +26706,12 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n", + "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", @@ -25672,7 +26723,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate[5m]", + "title": "Max Shards", "tooltip": { "shared": true, "sort": 0, @@ -25706,20 +26757,7 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Timestamps", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -25732,7 +26770,7 @@ items: "gridPos": { }, - "id": 4, + "id": 7, "legend": { "alignAsTable": false, "avg": false, @@ -25759,12 +26797,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n", + "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", @@ -25776,7 +26814,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate, in vs. succeeded or dropped [5m]", + "title": "Min Shards", "tooltip": { "shared": true, "sort": 0, @@ -25810,20 +26848,7 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -25836,7 +26861,7 @@ items: "gridPos": { }, - "id": 5, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -25853,51 +26878,26 @@ items: "links": [ ], - "minSpan": 6, "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": "queue", + "repeat": null, "seriesOverrides": [ - { - "alias": "/max_shards/", - "yaxis": 2 - } + ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, "targets": [ - { - "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max_shards:{{queue}}", - "refId": "A" - }, - { - "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "min_shards:{{queue}}", - "refId": "B" - }, { "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "desired_shards:{{queue}}", - "refId": "C" - }, - { - "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "current_shards:{{queue}}", - "refId": "D" + "legendFormat": "{{cluster}}:{{instance}}-{{queue}}", + "refId": "A" } ], "thresholds": [ @@ -25905,7 +26905,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Shards: $queue", + "title": "Desired Shards", "tooltip": { "shared": true, "sort": 0, @@ -25965,7 +26965,7 @@ items: "gridPos": { }, - "id": 6, + "id": 9, "legend": { "alignAsTable": false, "avg": false, @@ -25987,7 +26987,7 @@ items: "pointradius": 5, "points": false, "renderer": "flot", - "repeat": "queue", + "repeat": null, "seriesOverrides": [ ], @@ -26009,7 +27009,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Shard Capacity: $queue", + "title": "Shard Capacity", "tooltip": { "shared": true, "sort": 0, @@ -26056,7 +27056,7 @@ items: "gridPos": { }, - "id": 7, + "id": 10, "legend": { "alignAsTable": false, "avg": false, @@ -26078,7 +27078,7 @@ items: "pointradius": 5, "points": false, "renderer": "flot", - "repeat": "queue", + "repeat": null, "seriesOverrides": [ ], @@ -26100,7 +27100,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pending Samples: $queue", + "title": "Pending Samples", "tooltip": { "shared": true, "sort": 0, @@ -26160,7 +27160,7 @@ items: "gridPos": { }, - "id": 8, + "id": 11, "legend": { "alignAsTable": false, "avg": false, @@ -26251,7 +27251,7 @@ items: "gridPos": { }, - "id": 9, + "id": 12, "legend": { "alignAsTable": false, "avg": false, @@ -26355,7 +27355,7 @@ items: "gridPos": { }, - "id": 10, + "id": 13, "legend": { "alignAsTable": false, "avg": false, @@ -26446,7 +27446,7 @@ items: "gridPos": { }, - "id": 11, + "id": 14, "legend": { "alignAsTable": false, "avg": false, @@ -26537,7 +27537,7 @@ items: "gridPos": { }, - "id": 12, + "id": 15, "legend": { "alignAsTable": false, "avg": false, @@ -26628,7 +27628,7 @@ items: "gridPos": { }, - "id": 13, + "id": 16, "legend": { "alignAsTable": false, "avg": false, @@ -26809,16 +27809,7 @@ items: { "allValue": null, "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } + }, "datasource": "$datasource", "hide": 0, @@ -26829,7 +27820,7 @@ items: "options": [ ], - "query": "label_values(prometheus_remote_storage_shards, queue)", + "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, queue)", "refresh": 2, "regex": "", "sort": 0, @@ -27976,8 +28967,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -28304,15 +29295,15 @@ items: }, "id": 4, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -28499,15 +29490,15 @@ items: }, "id": 6, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -28819,15 +29810,15 @@ items: }, "id": 9, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -29201,8 +30192,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -29408,15 +30399,15 @@ items: }, "id": 3, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -29520,15 +30511,15 @@ items: }, "id": 4, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -29861,15 +30852,15 @@ items: }, "id": 7, "legend": { - "alignAsTable": "true", + "alignAsTable": true, "avg": false, - "current": "true", + "current": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, "linewidth": 1, @@ -30243,8 +31234,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -30412,7 +31403,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -30495,7 +31486,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -30578,7 +31569,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -31101,8 +32092,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -31290,15 +32281,11 @@ items: "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -31306,44 +32293,100 @@ items: "y": 1 }, "id": 3, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" + "legendFormat": "{{ pod }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Current Rate of Bytes Received", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -31351,30 +32394,90 @@ items: "y": 1 }, "id": 4, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" + "legendFormat": "{{ pod }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Current Rate of Bytes Transmitted", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "collapse": true, @@ -31391,15 +32494,11 @@ items: "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -31407,44 +32506,100 @@ items: "y": 11 }, "id": 6, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" + "legendFormat": "{{ pod }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Average Rate of Bytes Received", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "aliasColors": { }, - "breakpoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "prometheus", - "fontSize": "80%", - "format": "Bps", + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, "gridPos": { "h": 9, "w": 12, @@ -31452,30 +32607,90 @@ items: "y": 11 }, "id": 7, - "interval": null, "legend": { - "percentage": true, - "percentageDecimals": null, + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, + "total": false, "values": true }, - "legendType": "Right side", - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "donut", + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "instant": null, "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" + "legendFormat": "{{ pod }}", + "refId": "A", + "step": 10 } ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, "title": "Average Rate of Bytes Transmitted", - "type": "grafana-piechart-panel", - "valueName": "current" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], "repeat": null, @@ -31514,7 +32729,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31613,7 +32828,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31723,7 +32938,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31822,7 +33037,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31941,7 +33156,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -32040,7 +33255,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -32153,6 +33368,22 @@ items: ], "templating": { "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".+", "auto": false, @@ -32162,7 +33393,7 @@ items: "text": "kube-system", "value": "kube-system" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, "includeAll": true, @@ -32194,7 +33425,7 @@ items: "text": "", "value": "" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", "hide": 0, "includeAll": false, @@ -32226,7 +33457,7 @@ items: "text": "deployment", "value": "deployment" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", "hide": 0, "includeAll": false, @@ -32258,7 +33489,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -32303,7 +33534,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, diff --git a/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role-binding.yaml b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role-binding.yaml new file mode 100644 index 000000000..ed428132b --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role-binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: v1.8.0 + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: monitoring \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role.yaml b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role.yaml new file mode 100644 index 000000000..113a0af34 --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/cluster-role.yaml @@ -0,0 +1,110 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: v1.8.0 + name: kube-state-metrics +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + - ingresses + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - list + - watch \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/deployment.yaml b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/deployment.yaml new file mode 100644 index 000000000..958252baf --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/deployment.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + k8s-app: kube-state-metrics + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: v1.8.0 + name: kube-state-metrics +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: v1.8.0 + spec: + containers: + - image: quay.io/coreos/kube-state-metrics:v1.8.0 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + name: kube-state-metrics + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: kube-state-metrics \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-account.yaml b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-account.yaml new file mode 100644 index 000000000..c2a02e9d8 --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-account.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: v1.8.0 + name: kube-state-metrics \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-monitor.yaml b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-monitor.yaml new file mode 100644 index 000000000..97a3f8fe2 --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service-monitor.yaml @@ -0,0 +1,22 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: kube-state-metrics + labels: + prometheus: cluster-monitoring + k8s-app: kube-state-metrics +spec: + targetLabels: + - cluster + jobLabel: kube-state-metrics + selector: + matchLabels: + k8s-app: kube-state-metrics + namespaceSelector: + matchNames: + - monitoring + endpoints: + - port: http-metrics + honorLabels: true + scheme: http + interval: 60s \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service.yaml b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service.yaml new file mode 100644 index 000000000..d1baef903 --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/kube-state-metrics/service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: v1.8.0 + k8s-app: kube-state-metrics + cluster: docker-for-desktop + name: kube-state-metrics +spec: + clusterIP: None + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry + selector: + app.kubernetes.io/name: kube-state-metrics \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/apiserver.servicemonitor.yaml b/prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/apiserver.servicemonitor.yaml new file mode 100644 index 000000000..b7e7ab2ed --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/apiserver.servicemonitor.yaml @@ -0,0 +1,24 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: kube-apiserver + labels: + prometheus: cluster-monitoring + k8s-app: apiserver +spec: + jobLabel: component + selector: + matchLabels: + component: apiserver + provider: kubernetes + namespaceSelector: + matchNames: + - default + endpoints: + - port: https + interval: 60s + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/kubelet.servicemonitor.yaml b/prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/kubelet.servicemonitor.yaml new file mode 100644 index 000000000..c82ffcb8d --- /dev/null +++ b/prometheus-monitoring/kubernetes/1.14.8/prometheus-cluster-monitoring/kubelet.servicemonitor.yaml @@ -0,0 +1,77 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kubelet + name: kubelet + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + metricRelabelings: + - action: drop + regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) + sourceLabels: + - __name__ + - action: drop + regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) + sourceLabels: + - __name__ + - action: drop + regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs) + sourceLabels: + - __name__ + - action: drop + regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) + sourceLabels: + - __name__ + - action: drop + regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) + sourceLabels: + - __name__ + - action: drop + regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) + sourceLabels: + - __name__ + - action: drop + regex: transformation_(transformation_latencies_microseconds|failures_total) + sourceLabels: + - __name__ + - action: drop + regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries) + sourceLabels: + - __name__ + port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + scheme: https + tlsConfig: + insecureSkipVerify: true + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + metricRelabelings: + - action: drop + regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + sourceLabels: + - __name__ + path: /metrics/cadvisor + port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kubelet \ No newline at end of file diff --git a/prometheus-monitoring/kubernetes/1.14.8/prometheus-standalone/prometheus.yaml b/prometheus-monitoring/kubernetes/1.14.8/prometheus-standalone/prometheus.yaml index 1f5f2c451..b3b58816a 100644 --- a/prometheus-monitoring/kubernetes/1.14.8/prometheus-standalone/prometheus.yaml +++ b/prometheus-monitoring/kubernetes/1.14.8/prometheus-standalone/prometheus.yaml @@ -16,16 +16,6 @@ spec: operator: In values: - apps - # - kube-state-metrics - # - apiserver - # - kubelet - # ruleSelector: - # matchLabels: - # role: alert-rules - # prometheus: k8s - # resources: - # requests: - # memory: 400Mi --- apiVersion: v1 kind: ServiceAccount