-
Notifications
You must be signed in to change notification settings - Fork 600
/
config.libsonnet
123 lines (106 loc) · 5.99 KB
/
config.libsonnet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
{
_config+:: {
SLOs: {
apiserver: {
days: 30, // The number of days we alert on burning too much error budget for.
target: 0.99, // The target percentage of availability between 0-1. (0.99 = 99%, 0.999 = 99.9%)
// Only change these windows when you really understand multi burn rate errors.
// Even though you can change the days above (which will change availability calculations)
// these windows will alert on a 30 days sliding window. We're looking into basing these windows on the given days too.
windows: [
{ severity: 'critical', 'for': '2m', long: '1h', short: '5m', factor: 14.4 },
{ severity: 'critical', 'for': '15m', long: '6h', short: '30m', factor: 6 },
{ severity: 'warning', 'for': '1h', long: '1d', short: '2h', factor: 3 },
{ severity: 'warning', 'for': '3h', long: '3d', short: '6h', factor: 1 },
],
},
},
// Selectors are inserted between {} in Prometheus queries.
cadvisorSelector: 'job="cadvisor"',
kubeletSelector: 'job="kubelet"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
nodeExporterSelector: 'job="node-exporter"',
kubeSchedulerSelector: 'job="kube-scheduler"',
kubeControllerManagerSelector: 'job="kube-controller-manager"',
kubeApiserverSelector: 'job="kube-apiserver"',
kubeProxySelector: 'job="kube-proxy"',
podLabel: 'pod',
hostNetworkInterfaceSelector: 'device!~"veth.+"',
hostMountpointSelector: 'mountpoint="/"',
windowsExporterSelector: 'job="kubernetes-windows-exporter"',
containerfsSelector: 'container!=""',
// List of labels to join for different type of metrics
// Only works if your environment has the labels kube_%s_labels (e.g. kube_pod_labels) available.
common_join_labels: [],
pods_join_labels: $._config.common_join_labels,
statefulsets_join_labels: $._config.common_join_labels,
deployments_join_labels: $._config.common_join_labels,
daemonsets_join_labels: $._config.common_join_labels,
horizontalpodautoscalers_join_labels: $._config.common_join_labels,
jobs_join_labels: $._config.common_join_labels,
// Grafana dashboard IDs are necessary for stable links for dashboards
grafanaDashboardIDs: {
'apiserver.json': std.md5('apiserver.json'),
'cluster-total.json': std.md5('cluster-total.json'),
'controller-manager.json': std.md5('controller-manager.json'),
'k8s-resources-cluster.json': std.md5('k8s-resources-cluster.json'),
'k8s-resources-multicluster.json': std.md5('k8s-resources-multicluster.json'),
'k8s-resources-namespace.json': std.md5('k8s-resources-namespace.json'),
'k8s-resources-node.json': std.md5('k8s-resources-node.json'),
'k8s-resources-pod.json': std.md5('k8s-resources-pod.json'),
'k8s-resources-windows-cluster.json': std.md5('k8s-resources-windows-cluster.json'),
'k8s-resources-windows-namespace.json': std.md5('k8s-resources-windows-namespace.json'),
'k8s-resources-windows-pod.json': std.md5('k8s-resources-windows-pod.json'),
'k8s-resources-workload.json': std.md5('k8s-resources-workload.json'),
'k8s-resources-workloads-namespace.json': std.md5('k8s-resources-workloads-namespace.json'),
'k8s-windows-cluster-rsrc-use.json': std.md5('k8s-windows-cluster-rsrc-use.json'),
'k8s-windows-node-rsrc-use.json': std.md5('k8s-windows-node-rsrc-use.json'),
'kubelet.json': std.md5('kubelet.json'),
'namespace-by-pod.json': std.md5('namespace-by-pod.json'),
'namespace-by-workload.json': std.md5('namespace-by-workload.json'),
'persistentvolumesusage.json': std.md5('persistentvolumesusage.json'),
'pod-total.json': std.md5('pod-total.json'),
'proxy.json': std.md5('proxy.json'),
'scheduler.json': std.md5('scheduler.json'),
'workload-total.json': std.md5('workload-total.json'),
},
// Support for Grafana 7.2+ `$__rate_interval` instead of `$__interval`
grafana72: true,
grafanaIntervalVar: if self.grafana72 then '$__rate_interval' else '$__interval',
// Config for the Grafana dashboards in the Kubernetes Mixin
grafanaK8s: {
dashboardNamePrefix: 'Kubernetes / ',
dashboardTags: ['kubernetes-mixin'],
// For links between grafana dashboards, you need to tell us if your grafana
// servers under some non-root path.
linkPrefix: '',
// The default refresh time for all dashboards, default to 10s
refresh: '10s',
minimumTimeInterval: '1m',
// Timezone for Grafana dashboards:: UTC, browser, ...
grafanaTimezone: 'UTC',
},
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
showMultiCluster: false,
clusterLabel: 'cluster',
namespaceLabel: 'namespace',
// Default datasource name
datasourceName: 'default',
// Datasource instance filter regex
datasourceFilterRegex: '',
// This list of filesystem is referenced in various expressions.
fstypes: ['ext[234]', 'btrfs', 'xfs', 'zfs'],
fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes),
// This list of disk device names is referenced in various expressions.
diskDevices: ['mmcblk.p.+', 'nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+', 'dasd.+'],
diskDeviceSelector: 'device=~"(/dev.+)|%s"' % std.join('|', self.diskDevices),
// Certain workloads (e.g. KubeVirt/CDI) will fully utilise the persistent volume they claim
// the size of the PV will never grow since they consume the entirety of the volume by design.
// This selector allows an admin to 'pre-mark' the PVC of such a workload (or for any other use case)
// so that specific storage alerts will not fire.With the default selector, adding a label `excluded-from-alerts: 'true'`
// to the PVC will have the desired effect.
pvExcludedSelector: 'label_excluded_from_alerts="true"',
// Default timeout value for k8s Jobs. The jobs which are active beyond this duration would trigger KubeJobNotCompleted alert.
kubeJobTimeoutDuration: 12 * 60 * 60,
},
}