forked from honeycombio/refinery
-
Notifications
You must be signed in to change notification settings - Fork 1
/
rules_complete.toml
289 lines (251 loc) · 13.2 KB
/
rules_complete.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
############################
## Sampling Rules Config ##
############################
# DryRun - If enabled, marks traces that would be dropped given current sampling rules,
# and sends all traces regardless
# DryRun = false
# DryRunFieldName - the key to add to use to add to event data when using DryRun mode above, defaults to refinery_kept
# DryRunFieldName = "refinery_kept"
# DeterministicSampler is a section of the config for manipulating the
# Deterministic Sampler implementation. This is the simplest sampling algorithm
# - it is a static sample rate, choosing traces randomly to either keep or send
# (at the appropriate rate). It is not influenced by the contents of the trace.
Sampler = "DeterministicSampler"
# SampleRate is the rate at which to sample. It indicates a ratio, where one
# sample trace is kept for every n traces seen. For example, a SampleRate of 30
# will keep 1 out of every 30 traces. The choice on whether to keep any specific
# trace is random, so the rate is approximate.
# Eligible for live reload.
SampleRate = 1
[dataset1]
# Note: If your dataset name contains a space, you will have to escape the dataset name
# using single quotes, such as ['dataset 1']
# DynamicSampler is a section of the config for manipulating the simple Dynamic Sampler
# implementation. This sampler collects the values of a number of fields from a
# trace and uses them to form a key. This key is handed to the standard dynamic
# sampler algorithm which generates a sample rate based on the frequency with
# which that key has appeared in the previous ClearFrequencySec seconds. See
# https://github.com/honeycombio/dynsampler-go for more detail on the mechanics
# of the dynamic sampler. This sampler uses the AvgSampleRate algorithm from
# that package.
Sampler = "DynamicSampler"
# SampleRate is the goal rate at which to sample. It indicates a ratio, where
# one sample trace is kept for every n traces seen. For example, a SampleRate of
# 30 will keep 1 out of every 30 traces. This rate is handed to the dynamic
# sampler, who assigns a sample rate for each trace based on the fields selected
# from that trace.
# Eligible for live reload.
SampleRate = 2
# FieldList is a list of all the field names to use to form the key that will be
# handed to the dynamic sampler. The cardinality of the combination of values
# from all of these keys should be reasonable in the face of the frequency of
# those keys. If the combination of fields in these keys essentially makes them
# unique, the dynamic sampler will do no sampling. If the keys have too few
# values, you won't get samples of the most interesting traces. A good key
# selection will have consistent values for high frequency boring traffic and
# unique values for outliers and interesting traffic. Including an error field
# (or something like HTTP status code) is an excellent choice. As an example,
# assuming 30 or so endpoints, a combination of HTTP endpoint and status code
# would be a good set of keys in order to let you see accurately use of all
# endpoints and call out when there is failing traffic to any endpoint. Field
# names may come from any span in the trace.
# Eligible for live reload.
FieldList = ["request.method","response.status_code"]
# UseTraceLength will add the number of spans in the trace in to the dynamic
# sampler as part of the key. The number of spans is exact, so if there are
# normally small variations in trace length you may want to leave this off. If
# traces are consistent lengths and changes in trace length is a useful
# indicator of traces you'd like to see in Honeycomb, set this to true.
# Eligible for live reload.
UseTraceLength = true
# AddSampleRateKeyToTrace when this is set to true, the sampler will add a field
# to the root span of the trace containing the key used by the sampler to decide
# the sample rate. This can be helpful in understanding why the sampler is
# making certain decisions about sample rate and help you understand how to
# better choose the sample rate key (aka the FieldList setting above) to use.
AddSampleRateKeyToTrace = true
# AddSampleRateKeyToTraceField is the name of the field the sampler will use
# when adding the sample rate key to the trace. This setting is only used when
# AddSampleRateKeyToTrace is true.
AddSampleRateKeyToTraceField = "meta.refinery.dynsampler_key"
# ClearFrequencySec is the name of the field the sampler will use to determine
# the period over which it will calculate the sample rate. This setting defaults
# to 30.
# Eligible for live reload.
ClearFrequencySec = 60
[dataset2]
# EMADynamicSampler is a section of the config for manipulating the Exponential
# Moving Average (EMA) Dynamic Sampler implementation. Like the simple DynamicSampler,
# it attempts to average a given sample rate, weighting rare traffic and frequent
# traffic differently so as to end up with the correct average.
#
# EMADynamicSampler is an improvement upon the simple DynamicSampler and is recommended
# for most use cases. Based on the DynamicSampler implementation, EMADynamicSampler differs
# in that rather than compute rate based on a periodic sample of traffic, it maintains an Exponential
# Moving Average of counts seen per key, and adjusts this average at regular intervals.
# The weight applied to more recent intervals is defined by `weight`, a number between
# (0, 1) - larger values weight the average more toward recent observations. In other words,
# a larger weight will cause sample rates more quickly adapt to traffic patterns,
# while a smaller weight will result in sample rates that are less sensitive to bursts or drops
# in traffic and thus more consistent over time.
#
# Keys that are not found in the EMA will always have a sample
# rate of 1. Keys that occur more frequently will be sampled on a logarithmic
# curve. In other words, every key will be represented at least once in any
# given window and more frequent keys will have their sample rate
# increased proportionally to wind up with the goal sample rate.
Sampler = "EMADynamicSampler"
# GoalSampleRate is the goal rate at which to sample. It indicates a ratio, where
# one sample trace is kept for every n traces seen. For example, a SampleRate of
# 30 will keep 1 out of every 30 traces. This rate is handed to the dynamic
# sampler, who assigns a sample rate for each trace based on the fields selected
# from that trace.
# Eligible for live reload.
GoalSampleRate = 2
# FieldList is a list of all the field names to use to form the key that will be
# handed to the dynamic sampler. The cardinality of the combination of values
# from all of these keys should be reasonable in the face of the frequency of
# those keys. If the combination of fields in these keys essentially makes them
# unique, the dynamic sampler will do no sampling. If the keys have too few
# values, you won't get samples of the most interesting traces. A good key
# selection will have consistent values for high frequency boring traffic and
# unique values for outliers and interesting traffic. Including an error field
# (or something like HTTP status code) is an excellent choice. As an example,
# assuming 30 or so endpoints, a combination of HTTP endpoint and status code
# would be a good set of keys in order to let you see accurately use of all
# endpoints and call out when there is failing traffic to any endpoint. Field
# names may come from any span in the trace.
# Eligible for live reload.
FieldList = ["request.method","response.status_code"]
# UseTraceLength will add the number of spans in the trace in to the dynamic
# sampler as part of the key. The number of spans is exact, so if there are
# normally small variations in trace length you may want to leave this off. If
# traces are consistent lengths and changes in trace length is a useful
# indicator of traces you'd like to see in Honeycomb, set this to true.
# Eligible for live reload.
UseTraceLength = true
# AddSampleRateKeyToTrace when this is set to true, the sampler will add a field
# to the root span of the trace containing the key used by the sampler to decide
# the sample rate. This can be helpful in understanding why the sampler is
# making certain decisions about sample rate and help you understand how to
# better choose the sample rate key (aka the FieldList setting above) to use.
AddSampleRateKeyToTrace = true
# AddSampleRateKeyToTraceField is the name of the field the sampler will use
# when adding the sample rate key to the trace. This setting is only used when
# AddSampleRateKeyToTrace is true.
AddSampleRateKeyToTraceField = "meta.refinery.dynsampler_key"
# AdjustmentInterval defines how often (in seconds) we adjust the moving average from
# recent observations. Default 15s
# Eligible for live reload.
AdjustmentInterval = 15
# Weight is a value between (0, 1) indicating the weighting factor used to adjust
# the EMA. With larger values, newer data will influence the average more, and older
# values will be factored out more quickly. In mathematical literature concerning EMA,
# this is referred to as the `alpha` constant.
# Default is 0.5
# Eligible for live reload.
Weight = 0.5
# MaxKeys, if greater than 0, limits the number of distinct keys tracked in EMA.
# Once MaxKeys is reached, new keys will not be included in the sample rate map, but
# existing keys will continue to be be counted. You can use this to keep the sample rate
# map size under control.
# Eligible for live reload
MaxKeys = 0
# AgeOutValue indicates the threshold for removing keys from the EMA. The EMA of any key
# will approach 0 if it is not repeatedly observed, but will never truly reach it, so we have to
# decide what constitutes "zero". Keys with averages below this threshold will be removed
# from the EMA. Default is the same as Weight, as this prevents a key with the smallest
# integer value (1) from being aged out immediately. This value should generally be <= Weight,
# unless you have very specific reasons to set it higher.
# Eligible for live reload
AgeOutValue = 0.5
# BurstMultiple, if set, is multiplied by the sum of the running average of counts to define
# the burst detection threshold. If total counts observed for a given interval exceed the threshold
# EMA is updated immediately, rather than waiting on the AdjustmentInterval.
# Defaults to 2; negative value disables. With a default of 2, if your traffic suddenly doubles,
# burst detection will kick in.
# Eligible for live reload
BurstMultiple = 2.0
# BurstDetectionDelay indicates the number of intervals to run after Start is called before
# burst detection kicks in.
# Defaults to 3
# Eligible for live reload
BurstDetectionDelay = 3
[dataset3]
Sampler = "DeterministicSampler"
SampleRate = 10
[dataset4]
Sampler = "RulesBasedSampler"
# Optional, if set to true then the rules will also check nested json fields, in the format of parent.child
CheckNestedFields = false
[[dataset4.rule]]
name = "drop healthchecks"
drop = true
[[dataset4.rule.condition]]
field = "http.route"
operator = "="
value = "/health-check"
[[dataset4.rule]]
name = "keep slow 500 errors"
SampleRate = 1
[[dataset4.rule.condition]]
field = "status_code"
operator = "="
value = 500
[[dataset4.rule.condition]]
field = "duration_ms"
operator = ">="
value = 1000.789
[[dataset4.rule]]
name = "dynamically sample 200 responses"
[[dataset4.rule.condition]]
field = "status_code"
operator = "="
value = 200
[dataset4.rule.sampler.EMADynamicSampler]
Sampler = "EMADynamicSampler"
GoalSampleRate = 15
FieldList = ["request.method", "request.route"]
AddSampleRateKeyToTrace = true
AddSampleRateKeyToTraceField = "meta.refinery.dynsampler_key"
# Note that Refinery comparisons are type-dependent. If you are operating in an environment where different
# telemetry may send the same field with different types (for example, some systems send status codes as "200"
# instead of 200), you may need to create additional rules to cover these cases.
[[dataset4.rule]]
name = "dynamically sample 200 string responses"
[[dataset4.rule.condition]]
field = "status_code"
operator = "="
value = "200"
[dataset4.rule.sampler.EMADynamicSampler]
Sampler = "EMADynamicSampler"
GoalSampleRate = 15
FieldList = ["request.method", "request.route"]
AddSampleRateKeyToTrace = true
AddSampleRateKeyToTraceField = "meta.refinery.dynsampler_key"
[[dataset4.rule]]
name = "sample traces originating from a service"
# if scope is set to "span", a single span in the trace must match
# *all* of the conditions associated with this rule for the rule to
# apply to the trace.
#
# this is especially helpful when sampling a dataset written to
# by multiple services that call one another in normal operation –
# you can set Scope to 'span' to attribute traces to an origin
# service in a way that would be difficult without it.
Scope = "span"
SampleRate = 5
[[dataset4.rule.condition]]
field = "service name"
operator = "="
value = "users"
[[dataset4.rule.condition]]
field = "meta.span_type"
operator = "="
value = "root"
[[dataset4.rule]]
SampleRate = 10 # default when no rules match, if missing defaults to 10
[dataset5]
Sampler = "TotalThroughputSampler"
GoalThroughputPerSec = 100
FieldList = "[request.method]"