-
Notifications
You must be signed in to change notification settings - Fork 17
/
pattern.go
408 lines (362 loc) · 13.2 KB
/
pattern.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
package sigma
import (
"fmt"
"regexp"
"strings"
"github.com/gobwas/glob"
)
type TextPatternModifier int
const (
TextPatternNone TextPatternModifier = iota
TextPatternContains
TextPatternPrefix
TextPatternSuffix
TextPatternAll
TextPatternRegex
TextPatternKeyword
)
// func isValidSpecifier(in string) bool {
// return in == "contains" ||
// in == "endswith" ||
// in == "startswith"
// }
// NumMatcher is an atomic pattern for numeric item or list of items
type NumMatcher interface {
// NumMatch implements NumMatcher
NumMatch(int) bool
}
// NumMatchers holds multiple numeric matchers
type NumMatchers []NumMatcher
// NumMatch implements NumMatcher
func (n NumMatchers) NumMatch(val int) bool {
for _, v := range n {
if v.NumMatch(val) {
return true
}
}
return false
}
func NewNumMatcher(patterns ...int) (NumMatcher, error) {
if len(patterns) == 0 {
return nil, fmt.Errorf("no patterns defined for matcher object")
}
matcher := make(NumMatchers, 0)
for _, p := range patterns {
matcher = append(matcher, NumPattern{Val: p})
}
return func() NumMatcher {
if len(matcher) == 1 {
return matcher[0]
}
return matcher
}(), nil
}
// StringMatcher is an atomic pattern that could implement glob, literal or regex matchers
type StringMatcher interface {
// StringMatch implements StringMatcher
StringMatch(string) bool
}
var gWSCollapse = regexp.MustCompile(`\s+`)
// handleWhitespace takes str and if the global configuration for collapsing whitespace is NOT turned off
// returns the string with whitespace collapsed (1+ spaces, tabs, etc... become single space); otherwise
// just returns the unmodified str; this only applies to non-regex rules and data hitting non-regex rules
func handleWhitespace(str string, noCollapseWS bool) string {
if noCollapseWS { // do we collapse whitespace or not? See config.NoCollapseWS (we collapse by default)
return str
}
return gWSCollapse.ReplaceAllString(str, " ")
}
const (
sigmaSpecialWildcard = byte('*')
sigmaSpecialSingle = byte('?')
sigmaSpecialEscape = byte('\\')
globSpecialSqrBrktLeft = byte('[')
globSpecialSqrBrktRight = byte(']')
globSpecialCurlBrktLeft = byte('{')
globSpecialCurlBrktRight = byte('}')
)
// Sigma has a different set of rules than the Glob library for escaping, so this function attempts to
// translate from Sigma escaping to gobwas/glob escaping. For the most part we don't touch much of the
// escaped string; generally only when we see an unbalanced escape'd backslash (ex. '\' in Sigma needs to
// translated to '\\' for glob, '\\\' needs to translate to '\\\\', etc...).
//
// Generally we only need to really watch for runs of backslashes by themselves, in the case where you see
// a special character ('?' or '*') with an escape, any run of additional escapes should be valid by convention
// (e.g. '\\*' per Sigma is an escaped backslash with a wildcard while '\\\*' is an escaped backslash and escaped
// wildcard).
//
// Simga escaping rules per spec:
// * Plain backslash not followed by a wildcard can be expressed as single '\' or double backslash '\\'. For simplicity reasons the single notation is recommended.
// * A wildcard has to be escaped to handle it as a plain character: '\*'
// * The backslash before a wildcard has to be escaped to handle the value as a backslash followed by a wildcard: '\\*'
// * Three backslashes are necessary to escape both, the backslash and the wildcard and handle them as plain values: '\\\*'
// * Three or four backslashes are handled as double backslash. Four are recommended for consistency reasons: '\\\\' results in the plain value '\\'
func escapeSigmaForGlob(str string) string {
if str == "" { // quick out if empty
return ""
}
// special "quotemeta"-like functionality for brackets in glob (they should be treated as plaintext)
isBracket := func(b byte) bool {
return b == globSpecialSqrBrktLeft || b == globSpecialSqrBrktRight ||
b == globSpecialCurlBrktLeft || b == globSpecialCurlBrktRight
}
sLen := len(str)
replStr := make([]byte, 2*sLen)
x := (2 * sLen) - 1 // end of the replStr; we're working backwards
wildcard := false // we enter wildcard mode when we see a '?' or '*' and exit when we see something other than '\' or wildcard
slashCnt := 0 // to simplify balancing runs of escaped backslashes (without wildcards), we just count the number we've seen in a row
for i := (sLen - 1); i >= 0; i-- {
switch str[i] {
case sigmaSpecialWildcard, sigmaSpecialSingle: // wildcard is on when we see one of these characters
wildcard = true
case sigmaSpecialEscape: // character is an escape (backslash)
if !wildcard { // if we're no in wildcard mode, count the number of slashes we're putting out to ensure they're balanced
slashCnt++
}
default: // any other character, ensure wildcard mode is off
wildcard = false
}
// if we're no longer processing an escape character, check to see if we have a balanced count and if not, rebalance
if str[i] != sigmaSpecialEscape && slashCnt > 0 {
if (slashCnt % 2) != 0 {
replStr[x] = sigmaSpecialEscape
x-- // decrement x again as we're adding an extra char
}
slashCnt = 0
}
replStr[x] = str[i] // copy our current character to the output
x--
// special escape case for square/curly brackets; we need to escape these for glob
// as they have a special meaning in the glob library but not in Sigma
if isBracket(str[i]) {
replStr[x] = sigmaSpecialEscape
x-- // decrement x again as we're adding an extra char
}
}
// one last slash count before exiting to catch leading backslashes
if (slashCnt % 2) != 0 {
replStr[x] = sigmaSpecialEscape
} else {
x++ // for return, move back to the first valid characgter if we haven't added a compensating slash
}
return string(replStr[x:])
}
func NewStringMatcher(
mod TextPatternModifier,
lower, all, noCollapseWS bool,
patterns ...string,
) (StringMatcher, error) {
if len(patterns) == 0 {
return nil, fmt.Errorf("no patterns defined for matcher object")
}
matcher := make([]StringMatcher, 0)
for _, p := range patterns {
// process modifiers first
switch mod {
case TextPatternRegex: // regex per spec
re, err := regexp.Compile(p)
if err != nil {
return nil, err
}
matcher = append(matcher, RegexPattern{Re: re})
case TextPatternContains: // contains: puts * wildcards around the values, such that the value is matched anywhere in the field.
p = handleWhitespace(p, noCollapseWS)
// In this condition, we need to ensure single backslashes, etc... are escaped correctly before throwing the globs on either side
p = escapeSigmaForGlob(p)
p = "*" + p + "*"
globNG, err := glob.Compile(p)
if err != nil {
return nil, err
}
matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS})
case TextPatternSuffix:
p = handleWhitespace(p, noCollapseWS)
matcher = append(matcher, SuffixPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS})
case TextPatternPrefix:
p = handleWhitespace(p, noCollapseWS)
matcher = append(matcher, PrefixPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS})
default:
// no (supported) modifiers, handle non-spec regex, globs and regular values
if strings.HasPrefix(p, "/") && strings.HasSuffix(p, "/") {
re, err := regexp.Compile(strings.TrimLeft(strings.TrimRight(p, "/"), "/"))
if err != nil {
return nil, err
}
matcher = append(matcher, RegexPattern{Re: re})
} else if mod == TextPatternKeyword {
// this is a bit hacky, basically if the pattern coming in is a keyword and did not appear
// to be a regex, always process it as a 'contains' style glob (can appear anywhere...)
// this is due, I believe, on how keywords are generally handled, where it is likely a random
// string or event long message that may have additional detail/etc...
p = handleWhitespace(p, noCollapseWS)
// In this condition, we need to ensure single backslashes, etc... are escaped correctly before throwing the globs on either side
p = escapeSigmaForGlob(p)
p = "*" + p + "*"
globNG, err := glob.Compile(p)
if err != nil {
return nil, err
}
matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS})
} else if strings.Contains(p, "*") {
p = handleWhitespace(p, noCollapseWS)
// Do NOT call QuoteMeta here as we're assuming the author knows what they're doing...
p = escapeSigmaForGlob(p)
globNG, err := glob.Compile(p)
if err != nil {
return nil, err
}
matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS})
} else {
p = handleWhitespace(p, noCollapseWS)
matcher = append(matcher, ContentPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS})
}
}
}
return func() StringMatcher {
if len(matcher) == 1 {
return matcher[0]
}
if all {
return StringMatchersConj(matcher).Optimize()
}
return StringMatchers(matcher).Optimize()
}(), nil
}
// StringMatchers holds multiple atomic matchers
// Patterns are meant to be list of possibilities
// thus, objects are joined with logical disjunctions
type StringMatchers []StringMatcher
// StringMatch implements StringMatcher
func (s StringMatchers) StringMatch(msg string) bool {
for _, m := range s {
// I thought about a type assertion here for handling whitespace
// however, as we're dealing with non-pointer types, that may cause
// some added overhead that we can avoid by just implementing where need to
if m.StringMatch(msg) {
return true
}
}
return false
}
// Optimize creates a new StringMatchers slice ordered by matcher type
// First match wins, thus we can optimize by making sure fast string patterns
// are executed first, then globs, and finally slow regular expressions
func (s StringMatchers) Optimize() StringMatchers {
return optimizeStringMatchers(s)
}
// StringMatchersConj is similar to StringMatcher but elements are joined with
// conjunction, i.e. all patterns must match
// used to implement "all" specifier for selection types
type StringMatchersConj []StringMatcher
// StringMatch implements StringMatcher
func (s StringMatchersConj) StringMatch(msg string) bool {
for _, m := range s {
if !m.StringMatch(msg) {
return false
}
}
return true
}
// Optimize creates a new StringMatchers slice ordered by matcher type
// First match wins, thus we can optimize by making sure fast string patterns
// are executed first, then globs, and finally slow regular expressions
func (s StringMatchersConj) Optimize() StringMatchersConj {
return optimizeStringMatchers(s)
}
func optimizeStringMatchers(s []StringMatcher) []StringMatcher {
globs := make([]StringMatcher, 0)
re := make([]StringMatcher, 0)
literals := make([]StringMatcher, 0)
for _, pat := range s {
switch pat.(type) {
case ContentPattern, PrefixPattern, SuffixPattern:
literals = append(literals, pat)
case GlobPattern:
globs = append(globs, pat)
case RegexPattern:
re = append(re, pat)
}
}
return append(literals, append(globs, re...)...)
}
// ContentPattern is a token for literal content matching
type ContentPattern struct {
Token string
Lowercase bool
NoCollapseWS bool
}
// StringMatch implements StringMatcher
func (c ContentPattern) StringMatch(msg string) bool {
msg = handleWhitespace(msg, c.NoCollapseWS)
return lowerCaseIfNeeded(msg, c.Lowercase) == lowerCaseIfNeeded(c.Token, c.Lowercase)
}
// PrefixPattern is a token for literal content matching
type PrefixPattern struct {
Token string
Lowercase bool
NoCollapseWS bool
}
// StringMatch implements StringMatcher
func (c PrefixPattern) StringMatch(msg string) bool {
msg = handleWhitespace(msg, c.NoCollapseWS)
return strings.HasPrefix(
lowerCaseIfNeeded(msg, c.Lowercase),
lowerCaseIfNeeded(c.Token, c.Lowercase),
)
}
// SuffixPattern is a token for literal content matching
type SuffixPattern struct {
Token string
Lowercase bool
NoCollapseWS bool
}
// StringMatch implements StringMatcher
func (c SuffixPattern) StringMatch(msg string) bool {
msg = handleWhitespace(msg, c.NoCollapseWS)
return strings.HasSuffix(
lowerCaseIfNeeded(msg, c.Lowercase),
lowerCaseIfNeeded(c.Token, c.Lowercase),
)
}
// RegexPattern is for matching messages with regular expresions
type RegexPattern struct {
Re *regexp.Regexp
}
// StringMatch implements StringMatcher
func (r RegexPattern) StringMatch(msg string) bool {
return r.Re.MatchString(msg)
}
// GlobPattern is similar to ContentPattern but allows for asterisk wildcards
type GlobPattern struct {
Glob *glob.Glob
NoCollapseWS bool
}
// StringMatch implements StringMatcher
func (g GlobPattern) StringMatch(msg string) bool {
msg = handleWhitespace(msg, g.NoCollapseWS)
return (*g.Glob).Match(msg)
}
// SimplePattern is a reference type to illustrate StringMatcher
type SimplePattern struct {
Token string
NoCollapseWS bool
}
// StringMatch implements StringMatcher
func (s SimplePattern) StringMatch(msg string) bool {
msg = handleWhitespace(msg, s.NoCollapseWS)
return strings.Contains(msg, s.Token)
}
func lowerCaseIfNeeded(str string, lower bool) string {
if lower {
return strings.ToLower(str)
}
return str
}
// NumPattern matches on numeric value
type NumPattern struct {
Val int
}
// NumMatch implements NumMatcher
func (n NumPattern) NumMatch(val int) bool {
return n.Val == val
}