-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathisp-monitor-influxdata-template.yml
469 lines (434 loc) · 16.5 KB
/
isp-monitor-influxdata-template.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
apiVersion: influxdata.com/v2alpha1
kind: Bucket
metadata:
name: fervent-herschel-737001
spec:
name: Network
retentionRules:
- everySeconds: 2.592e+06
type: expire
---
apiVersion: influxdata.com/v2alpha1
kind: CheckDeadman
metadata:
name: laughing-chatterjee-b37001
spec:
every: 1m0s
level: CRIT
name: Network Healthcheck
query: |-
from(bucket: "Network")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ping")
|> filter(fn: (r) => r["_field"] == "average_response_ms")
staleTime: 10m0s
status: active
statusMessageTemplate: 'Check: ${ r._check_name } is: ${ r._level }'
timeSince: 1m0s
---
apiVersion: influxdata.com/v2alpha1
kind: NotificationEndpointSlack
metadata:
name: laughing-mccarthy-b37003
spec:
name: Webhook
status: active
url: https://REPALCE_ME_WITH_WEBHOOK.com
---
apiVersion: influxdata.com/v2alpha1
kind: NotificationRule
metadata:
name: obstinate-beaver-b37007
spec:
endpointName: laughing-mccarthy-b37003
every: 1m0s
messageTemplate: 'Notification Rule: ${ r._notification_rule_name } triggered
by check: ${ r._check_name }: ${ r._message }'
name: Going online
statusRules:
- currentLevel: OK
previousLevel: CRIT
---
apiVersion: influxdata.com/v2alpha1
kind: NotificationRule
metadata:
name: pedantic-ptolemy-b37005
spec:
endpointName: laughing-mccarthy-b37003
every: 1m0s
messageTemplate: 'Notification Rule: ${ r._notification_rule_name } triggered
by check: ${ r._check_name }: ${ r._message }'
name: Going Offline
statusRules:
- currentLevel: CRIT
previousLevel: OK
---
apiVersion: influxdata.com/v2alpha1
kind: Dashboard
metadata:
name: alerting-hamilton-f37001
spec:
charts:
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
colors:
- hex: '#31C0F6'
id: 976493ec-e5fb-49c1-a0db-4cdaa1fb8961
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
id: 9941e749-11f0-4114-810b-e7e14a4b222a
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
id: 3baf79cd-b2d7-41ed-bbd5-dc38cc76143d
name: Nineteen Eighty Four
type: scale
geom: line
height: 4
hoverDimension: auto
kind: Xy
legendColorizeRows: true
legendOpacity: 1
legendOrientationThreshold: 10
name: Ping
position: overlaid
queries:
- query: |-
from(bucket: "Network")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ping")
|> filter(fn: (r) => r["_field"] == "average_response_ms")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "mean")
width: 4
xCol: _time
yCol: _value
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
colors:
- hex: '#31C0F6'
id: 976493ec-e5fb-49c1-a0db-4cdaa1fb8961
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
id: 9941e749-11f0-4114-810b-e7e14a4b222a
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
id: 3baf79cd-b2d7-41ed-bbd5-dc38cc76143d
name: Nineteen Eighty Four
type: scale
geom: line
height: 4
hoverDimension: auto
kind: Xy
legendColorizeRows: true
legendOpacity: 1
legendOrientationThreshold: 10
name: Package Loss
position: overlaid
queries:
- query: |-
from(bucket: "Network")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ping")
|> filter(fn: (r) => r["_field"] == "percent_packet_loss")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "mean")
width: 4
xCol: _time
xPos: 4
yCol: _value
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
colors:
- hex: '#31C0F6'
id: 976493ec-e5fb-49c1-a0db-4cdaa1fb8961
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
id: 9941e749-11f0-4114-810b-e7e14a4b222a
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
id: 3baf79cd-b2d7-41ed-bbd5-dc38cc76143d
name: Nineteen Eighty Four
type: scale
geom: line
height: 4
hoverDimension: auto
kind: Xy
legendColorizeRows: true
legendOpacity: 1
legendOrientationThreshold: 10
name: DNS Resolver
position: overlaid
queries:
- query: |-
from(bucket: "Network")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "dns_query")
|> filter(fn: (r) => r["_field"] == "query_time_ms")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "mean")
width: 4
xCol: _time
xPos: 8
yCol: _value
name: Network Monitor
---
apiVersion: influxdata.com/v2alpha1
kind: Telegraf
metadata:
name: upbeat-kepler-337001
spec:
config: |+
# Telegraf Configuration
#
# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "10s"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will send metrics to outputs in batches of at most
## metric_batch_size metrics.
## This controls the size of writes that Telegraf sends to output plugins.
metric_batch_size = 1000
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
## This buffer only fills when writes fail to output plugin(s).
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "0s"
## Default flushing interval for all outputs. Maximum flush_interval will be
## flush_interval + flush_jitter
flush_interval = "10s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## By default or when set to "0s", precision will be set to the same
## timestamp order as the collection interval, with the maximum being 1s.
## ie, when interval = "10s", precision will be "1s"
## when interval = "250ms", precision will be "1ms"
## Precision will NOT be used for service inputs. It is up to each individual
## service input to set the timestamp at the appropriate precision.
## Valid time units are "ns", "us" (or "µs"), "ms", "s".
precision = ""
## Logging configuration:
## Run telegraf with debug log messages.
debug = false
## Run telegraf in quiet mode (error log messages only).
quiet = false
## Specify the log file name. The empty string means to log to stderr.
logfile = ""
## Override default hostname, if empty use os.Hostname()
# hostname = "$BALENA_DEVICE_NAME_AT_INIT"
## If set to true, do no set the "host" tag in the telegraf agent.
omit_hostname = false
###############################################################################
# OUTPUT PLUGINS #
###############################################################################
[[outputs.influxdb_v2]]
## The URLs of the InfluxDB cluster nodes.
##
## Multiple URLs can be specified for a single cluster, only ONE of the
## urls will be written to each interval.
## urls exp: http://127.0.0.1:9999
urls = ["$INFLUX_HOST"]
## Token for authentication.
token = "$INFLUX_TOKEN"
## Organization is the name of the organization you wish to write to; must exist.
organization = "$INFLUX_ORG"
## Destination bucket to write into.
bucket = "Network"
###############################################################################
# INPUT PLUGINS #
###############################################################################
# # Query given DNS server and gives statistics
[[inputs.dns_query]]
## servers to query
servers = ["8.8.8.8", "1.1.1.1"]
# ## Network is the network protocol name.
# # network = "udp"
#
# ## Domains or subdomains to query.
domains = ["google.com"]
#
# ## Query record type.
# ## Posible values: A, AAAA, CNAME, MX, NS, PTR, TXT, SOA, SPF, SRV.
# # record_type = "A"
#
# ## Dns server port.
# # port = 53
#
# ## Query timeout in seconds.
timeout = 2
# # Read formatted metrics from one or more HTTP endpoints
# [[inputs.http]]
# ## One or more URLs from which to read formatted metrics
# urls = [
# "http://localhost/metrics"
# ]
#
# ## HTTP method
# # method = "GET"
#
# ## Optional HTTP headers
# # headers = {"X-Special-Header" = "Special-Value"}
#
# ## Optional HTTP Basic Auth Credentials
# # username = "username"
# # password = "pa$$word"
#
# ## HTTP entity-body to send with POST/PUT requests.
# # body = ""
#
# ## HTTP Content-Encoding for write request body, can be set to "gzip" to
# ## compress body or "identity" to apply no encoding.
# # content_encoding = "identity"
#
# ## Optional TLS Config
# # tls_ca = "/etc/telegraf/ca.pem"
# # tls_cert = "/etc/telegraf/cert.pem"
# # tls_key = "/etc/telegraf/key.pem"
# ## Use TLS but skip chain & host verification
# # insecure_skip_verify = false
#
# ## Amount of time allowed to complete the HTTP request
# # timeout = "5s"
#
# ## Data format to consume.
# ## Each data format has its own unique set of configuration options, read
# ## more about them here:
# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
# # data_format = "influx"
# # HTTP/HTTPS request given an address a method and a timeout
# [[inputs.http_response]]
# ## Server address (default http://localhost)
# # address = "http://localhost"
#
# ## Set http_proxy (telegraf uses the system wide proxy settings if it's is not set)
# # http_proxy = "http://localhost:8888"
#
# ## Set response_timeout (default 5 seconds)
# # response_timeout = "5s"
#
# ## HTTP Request Method
# # method = "GET"
#
# ## Whether to follow redirects from the server (defaults to false)
# # follow_redirects = false
#
# ## Optional HTTP Request Body
# # body = '''
# # {'fake':'data'}
# # '''
#
# ## Optional substring or regex match in body of the response
# # response_string_match = "\"service_status\": \"up\""
# # response_string_match = "ok"
# # response_string_match = "\".*_status\".?:.?\"up\""
#
# ## Optional TLS Config
# # tls_ca = "/etc/telegraf/ca.pem"
# # tls_cert = "/etc/telegraf/cert.pem"
# # tls_key = "/etc/telegraf/key.pem"
# ## Use TLS but skip chain & host verification
# # insecure_skip_verify = false
#
# ## HTTP Request Headers (all values must be strings)
# # [inputs.http_response.headers]
# # Host = "github.com"
# # Read flattened metrics from one or more JSON HTTP endpoints
# [[inputs.httpjson]]
# ## NOTE This plugin only reads numerical measurements, strings and booleans
# ## will be ignored.
#
# ## Name for the service being polled. Will be appended to the name of the
# ## measurement e.g. httpjson_webserver_stats
# ##
# ## Deprecated (1.3.0): Use name_override, name_suffix, name_prefix instead.
# name = "webserver_stats"
#
# ## URL of each server in the service's cluster
# servers = [
# "http://localhost:9999/stats/",
# "http://localhost:9998/stats/",
# ]
# ## Set response_timeout (default 5 seconds)
# response_timeout = "5s"
#
# ## HTTP method to use: GET or POST (case-sensitive)
# method = "GET"
#
# ## List of tag names to extract from top-level of JSON server response
# # tag_keys = [
# # "my_tag_1",
# # "my_tag_2"
# # ]
#
# ## Optional TLS Config
# # tls_ca = "/etc/telegraf/ca.pem"
# # tls_cert = "/etc/telegraf/cert.pem"
# # tls_key = "/etc/telegraf/key.pem"
# ## Use TLS but skip chain & host verification
# # insecure_skip_verify = false
#
# ## HTTP parameters (all values must be strings). For "GET" requests, data
# ## will be included in the query. For "POST" requests, data will be included
# ## in the request body as "x-www-form-urlencoded".
# # [inputs.httpjson.parameters]
# # event_type = "cpu_spike"
# # threshold = "0.75"
#
# ## HTTP Headers (all values must be strings)
# # [inputs.httpjson.headers]
# # X-Auth-Token = "my-xauth-token"
# # apiVersion = "v1"
# # Ping given url(s) and return statistics
[[inputs.ping]]
## List of urls to ping
urls = ["google.com", "8.8.8.8"]
## Number of pings to send per collection (ping -c <COUNT>)
count = 1
## Interval, in s, at which to ping. 0 == default (ping -i <PING_INTERVAL>)
## Not available in Windows.
ping_interval = 10.0
## Per-ping timeout, in s. 0 == no timeout (ping -W <TIMEOUT>)
timeout = 1.0
## Total-ping deadline, in s. 0 == no deadline (ping -w <DEADLINE>)
# deadline = 10
## Interface or source address to send ping from (ping -I <INTERFACE/SRC_ADDR>)
## on Darwin and Freebsd only source address possible: (ping -S <SRC_ADDR>)
# interface = ""
## Specify the ping executable binary, default is "ping"
binary = "/bin/ping"
## Arguments for ping command
## when arguments is not empty, other options (ping_interval, timeout, etc) will be ignored
arguments = ["-c", "1", "-n", "-s", "32", "-i", "1", "-W", "1", "-w", "10"]
description: Simple monitor with Ping and DNS query. Requires $INFLUX_HOST; $INFLUX_TOKEN;
$INFLUX_ORG
name: Network Monitor