forked from timstclair/kubernetes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
events_cache.go
360 lines (310 loc) · 11.6 KB
/
events_cache.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package record
import (
"encoding/json"
"fmt"
"strings"
"sync"
"time"
"github.com/golang/groupcache/lru"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/pkg/util/strategicpatch"
)
const (
maxLruCacheEntries = 4096
// if we see the same event that varies only by message
// more than 10 times in a 10 minute period, aggregate the event
defaultAggregateMaxEvents = 10
defaultAggregateIntervalInSeconds = 600
)
// getEventKey builds unique event key based on source, involvedObject, reason, message
func getEventKey(event *api.Event) string {
return strings.Join([]string{
event.Source.Component,
event.Source.Host,
event.InvolvedObject.Kind,
event.InvolvedObject.Namespace,
event.InvolvedObject.Name,
string(event.InvolvedObject.UID),
event.InvolvedObject.APIVersion,
event.Type,
event.Reason,
event.Message,
},
"")
}
// EventFilterFunc is a function that returns true if the event should be skipped
type EventFilterFunc func(event *api.Event) bool
// DefaultEventFilterFunc returns false for all incoming events
func DefaultEventFilterFunc(event *api.Event) bool {
return false
}
// EventAggregatorKeyFunc is responsible for grouping events for aggregation
// It returns a tuple of the following:
// aggregateKey - key the identifies the aggregate group to bucket this event
// localKey - key that makes this event in the local group
type EventAggregatorKeyFunc func(event *api.Event) (aggregateKey string, localKey string)
// EventAggregatorByReasonFunc aggregates events by exact match on event.Source, event.InvolvedObject, event.Type and event.Reason
func EventAggregatorByReasonFunc(event *api.Event) (string, string) {
return strings.Join([]string{
event.Source.Component,
event.Source.Host,
event.InvolvedObject.Kind,
event.InvolvedObject.Namespace,
event.InvolvedObject.Name,
string(event.InvolvedObject.UID),
event.InvolvedObject.APIVersion,
event.Type,
event.Reason,
},
""), event.Message
}
// EventAggregatorMessageFunc is responsible for producing an aggregation message
type EventAggregatorMessageFunc func(event *api.Event) string
// EventAggregratorByReasonMessageFunc returns an aggregate message by prefixing the incoming message
func EventAggregatorByReasonMessageFunc(event *api.Event) string {
return "(events with common reason combined)"
}
// EventAggregator identifies similar events and aggregates them into a single event
type EventAggregator struct {
sync.RWMutex
// The cache that manages aggregation state
cache *lru.Cache
// The function that groups events for aggregation
keyFunc EventAggregatorKeyFunc
// The function that generates a message for an aggregate event
messageFunc EventAggregatorMessageFunc
// The maximum number of events in the specified interval before aggregation occurs
maxEvents int
// The amount of time in seconds that must transpire since the last occurrence of a similar event before it's considered new
maxIntervalInSeconds int
// clock is used to allow for testing over a time interval
clock util.Clock
}
// NewEventAggregator returns a new instance of an EventAggregator
func NewEventAggregator(lruCacheSize int, keyFunc EventAggregatorKeyFunc, messageFunc EventAggregatorMessageFunc,
maxEvents int, maxIntervalInSeconds int, clock util.Clock) *EventAggregator {
return &EventAggregator{
cache: lru.New(lruCacheSize),
keyFunc: keyFunc,
messageFunc: messageFunc,
maxEvents: maxEvents,
maxIntervalInSeconds: maxIntervalInSeconds,
clock: clock,
}
}
// aggregateRecord holds data used to perform aggregation decisions
type aggregateRecord struct {
// we track the number of unique local keys we have seen in the aggregate set to know when to actually aggregate
// if the size of this set exceeds the max, we know we need to aggregate
localKeys sets.String
// The last time at which the aggregate was recorded
lastTimestamp unversioned.Time
}
// EventAggregate identifies similar events and groups into a common event if required
func (e *EventAggregator) EventAggregate(newEvent *api.Event) (*api.Event, error) {
aggregateKey, localKey := e.keyFunc(newEvent)
now := unversioned.NewTime(e.clock.Now())
record := aggregateRecord{localKeys: sets.NewString(), lastTimestamp: now}
e.Lock()
defer e.Unlock()
value, found := e.cache.Get(aggregateKey)
if found {
record = value.(aggregateRecord)
}
// if the last event was far enough in the past, it is not aggregated, and we must reset state
maxInterval := time.Duration(e.maxIntervalInSeconds) * time.Second
interval := now.Time.Sub(record.lastTimestamp.Time)
if interval > maxInterval {
record = aggregateRecord{localKeys: sets.NewString()}
}
record.localKeys.Insert(localKey)
record.lastTimestamp = now
e.cache.Add(aggregateKey, record)
if record.localKeys.Len() < e.maxEvents {
return newEvent, nil
}
// do not grow our local key set any larger than max
record.localKeys.PopAny()
// create a new aggregate event
eventCopy := &api.Event{
ObjectMeta: api.ObjectMeta{
Name: fmt.Sprintf("%v.%x", newEvent.InvolvedObject.Name, now.UnixNano()),
Namespace: newEvent.Namespace,
},
Count: 1,
FirstTimestamp: now,
InvolvedObject: newEvent.InvolvedObject,
LastTimestamp: now,
Message: e.messageFunc(newEvent),
Type: newEvent.Type,
Reason: newEvent.Reason,
Source: newEvent.Source,
}
return eventCopy, nil
}
// eventLog records data about when an event was observed
type eventLog struct {
// The number of times the event has occurred since first occurrence.
count int
// The time at which the event was first recorded.
firstTimestamp unversioned.Time
// The unique name of the first occurrence of this event
name string
// Resource version returned from previous interaction with server
resourceVersion string
}
// eventLogger logs occurrences of an event
type eventLogger struct {
sync.RWMutex
cache *lru.Cache
clock util.Clock
}
// newEventLogger observes events and counts their frequencies
func newEventLogger(lruCacheEntries int, clock util.Clock) *eventLogger {
return &eventLogger{cache: lru.New(lruCacheEntries), clock: clock}
}
// eventObserve records the event, and determines if its frequency should update
func (e *eventLogger) eventObserve(newEvent *api.Event) (*api.Event, []byte, error) {
var (
patch []byte
err error
)
key := getEventKey(newEvent)
eventCopy := *newEvent
event := &eventCopy
e.Lock()
defer e.Unlock()
lastObservation := e.lastEventObservationFromCache(key)
// we have seen this event before, so we must prepare a patch
if lastObservation.count > 0 {
// update the event based on the last observation so patch will work as desired
event.Name = lastObservation.name
event.ResourceVersion = lastObservation.resourceVersion
event.FirstTimestamp = lastObservation.firstTimestamp
event.Count = lastObservation.count + 1
eventCopy2 := *event
eventCopy2.Count = 0
eventCopy2.LastTimestamp = unversioned.NewTime(time.Unix(0, 0))
newData, _ := json.Marshal(event)
oldData, _ := json.Marshal(eventCopy2)
patch, err = strategicpatch.CreateStrategicMergePatch(oldData, newData, event)
}
// record our new observation
e.cache.Add(
key,
eventLog{
count: event.Count,
firstTimestamp: event.FirstTimestamp,
name: event.Name,
resourceVersion: event.ResourceVersion,
},
)
return event, patch, err
}
// updateState updates its internal tracking information based on latest server state
func (e *eventLogger) updateState(event *api.Event) {
key := getEventKey(event)
e.Lock()
defer e.Unlock()
// record our new observation
e.cache.Add(
key,
eventLog{
count: event.Count,
firstTimestamp: event.FirstTimestamp,
name: event.Name,
resourceVersion: event.ResourceVersion,
},
)
}
// lastEventObservationFromCache returns the event from the cache, reads must be protected via external lock
func (e *eventLogger) lastEventObservationFromCache(key string) eventLog {
value, ok := e.cache.Get(key)
if ok {
observationValue, ok := value.(eventLog)
if ok {
return observationValue
}
}
return eventLog{}
}
// EventCorrelator processes all incoming events and performs analysis to avoid overwhelming the system. It can filter all
// incoming events to see if the event should be filtered from further processing. It can aggregate similar events that occur
// frequently to protect the system from spamming events that are difficult for users to distinguish. It performs de-duplication
// to ensure events that are observed multiple times are compacted into a single event with increasing counts.
type EventCorrelator struct {
// the function to filter the event
filterFunc EventFilterFunc
// the object that performs event aggregation
aggregator *EventAggregator
// the object that observes events as they come through
logger *eventLogger
}
// EventCorrelateResult is the result of a Correlate
type EventCorrelateResult struct {
// the event after correlation
Event *api.Event
// if provided, perform a strategic patch when updating the record on the server
Patch []byte
// if true, do no further processing of the event
Skip bool
}
// NewEventCorrelator returns an EventCorrelator configured with default values.
//
// The EventCorrelator is responsible for event filtering, aggregating, and counting
// prior to interacting with the API server to record the event.
//
// The default behavior is as follows:
// * No events are filtered from being recorded
// * Aggregation is performed if a similar event is recorded 10 times in a
// in a 10 minute rolling interval. A similar event is an event that varies only by
// the Event.Message field. Rather than recording the precise event, aggregation
// will create a new event whose message reports that it has combined events with
// the same reason.
// * Events are incrementally counted if the exact same event is encountered multiple
// times.
func NewEventCorrelator(clock util.Clock) *EventCorrelator {
cacheSize := maxLruCacheEntries
return &EventCorrelator{
filterFunc: DefaultEventFilterFunc,
aggregator: NewEventAggregator(
cacheSize,
EventAggregatorByReasonFunc,
EventAggregatorByReasonMessageFunc,
defaultAggregateMaxEvents,
defaultAggregateIntervalInSeconds,
clock),
logger: newEventLogger(cacheSize, clock),
}
}
// EventCorrelate filters, aggregates, counts, and de-duplicates all incoming events
func (c *EventCorrelator) EventCorrelate(newEvent *api.Event) (*EventCorrelateResult, error) {
if c.filterFunc(newEvent) {
return &EventCorrelateResult{Skip: true}, nil
}
aggregateEvent, err := c.aggregator.EventAggregate(newEvent)
if err != nil {
return &EventCorrelateResult{}, err
}
observedEvent, patch, err := c.logger.eventObserve(aggregateEvent)
return &EventCorrelateResult{Event: observedEvent, Patch: patch}, err
}
// UpdateState based on the latest observed state from server
func (c *EventCorrelator) UpdateState(event *api.Event) {
c.logger.updateState(event)
}