diff --git a/CHANGELOG.md b/CHANGELOG.md index ac0836ceea..b4f1a41b49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,8 @@ v1.6.0-rc.0 - Update `prometheus.write.queue` library for performance increases in cpu. (@mattdurham) +- Update `loki.secretfilter` to be compatible with the new `[[rules.allowlists]]` gitleaks allowlist format (@romain-gaillard) + - Update `async-profiler` binaries for `pyroscope.java` to 3.0-fa937db (@aleks-p) - Reduced memory allocation in discovery components by up to 30% (@thampiotr) @@ -97,6 +99,8 @@ v1.6.0-rc.0 - Fixed an issue where the `otelcol.processor.interval` could not be used because the debug metrics were not set to default. (@wildum) +- Fixed an issue where `loki.secretfilter` would crash if the secret was shorter than the `partial_mask` value. (@romain-gaillard) + - Change the log level in the `eventlogmessage` stage of the `loki.process` component from `warn` to `debug`. (@wildum) - Fix a bug in `loki.source.kafka` where the `topics` argument incorrectly used regex matching instead of exact matches. (@wildum) diff --git a/docs/sources/reference/components/loki/loki.secretfilter.md b/docs/sources/reference/components/loki/loki.secretfilter.md index cb412d2225..f4eff72dfe 100644 --- a/docs/sources/reference/components/loki/loki.secretfilter.md +++ b/docs/sources/reference/components/loki/loki.secretfilter.md @@ -18,7 +18,7 @@ The detection is based on regular expression patterns, defined in the [Gitleaks {{< admonition type="caution" >}} Personally Identifiable Information (PII) isn't currently in scope and some secrets could remain undetected. -This component may generate false positives. +This component may generate false positives or redact too much. Don't rely solely on this component to redact sensitive information. {{< /admonition >}} @@ -49,7 +49,14 @@ Name | Type | Description The `gitleaks_config` argument is the path to the custom `gitleaks.toml` file. The Gitleaks configuration file embedded in the component is used if you don't provide the path to a custom configuration file. -The `types` argument is a map of secret types to look for. The values are used as prefixes for the secret types in the Gitleaks configuration. If you don't provide this argument, all types are used. +{{< admonition type="note" >}} +This component doesn't support all the features of the Gitleaks configuration file. It only supports regular expression-based rules, `secretGroup`, and allowlist regular expressions. `regexTarget` only supports the default value `secret`. Other features such as `keywords`, `entropy`, `paths`, and `stopwords` are not supported. The `extend` feature is not supported. If you use a custom configuration file, you must include all the rules you want to use within the configuration file. Unsupported fields and values in the configuration file are ignored. +{{< /admonition >}} + +The `types` argument is a map of secret types to look for. +The values provided are used as prefixes to match rules IDs in the Gitleaks configuration. +For example, providing the type `grafana` will match the rules `grafana-api-key`, `grafana-cloud-api-token`, and `grafana-service-account-token`. +If you don't provide this argument, all rules are used. {{< admonition type="note" >}} Configuring this argument with the secret types you want to look for is strongly recommended. @@ -74,6 +81,8 @@ A secret will not be redacted if it matches any of the regular expressions. The The `partial_mask` argument is the number of characters to show from the beginning of the secret before the redact string is added. If set to `0`, the entire secret is redacted. +If a secret is not at least 6 characters long, it will be entirely redacted. +For short secrets, at most half of the secret is shown. ## Blocks diff --git a/internal/component/loki/secretfilter/secretfilter.go b/internal/component/loki/secretfilter/secretfilter.go index ec6b11dd19..071ba481c4 100644 --- a/internal/component/loki/secretfilter/secretfilter.go +++ b/internal/component/loki/secretfilter/secretfilter.go @@ -29,7 +29,6 @@ type Rule struct { name string regex *regexp.Regexp secretGroup int - description string allowlist []AllowRule } @@ -98,19 +97,20 @@ type Component struct { type GitLeaksConfig struct { AllowList struct { Description string - Paths []string Regexes []string } Rules []struct { ID string - Description string Regex string - Keywords []string SecretGroup int + // Old format, kept for compatibility Allowlist struct { - StopWords []string - Regexes []string + Regexes []string + } + // New format + Allowlists []struct { + Regexes []string } } } @@ -194,16 +194,16 @@ func (c *Component) processEntry(entry loki.Entry) loki.Entry { // Check if the secret is in the allowlist var allowRule *AllowRule = nil - // First check the rule-specific allowlist - for _, a := range r.allowlist { + // First check the global allowlist + for _, a := range c.AllowList { if a.Regex.MatchString(secret) { allowRule = &a break } } - // Then check the global allowlist + // Then check the rule-specific allowlists if allowRule == nil { - for _, a := range c.AllowList { + for _, a := range r.allowlist { if a.Regex.MatchString(secret) { allowRule = &a break @@ -231,8 +231,21 @@ func (c *Component) redactLine(line string, secret string, ruleName string) stri redactWith = strings.ReplaceAll(redactWith, "$SECRET_NAME", ruleName) redactWith = strings.ReplaceAll(redactWith, "$SECRET_HASH", hashSecret(secret)) } - if c.args.PartialMask > 0 { - redactWith = secret[:c.args.PartialMask] + redactWith + + // If partialMask is set, show the first N characters of the secret + partialMask := int(c.args.PartialMask) + if partialMask < 0 { + partialMask = 0 + } + runesSecret := []rune(secret) + // Only do it if the secret is long enough + if partialMask > 0 && len(runesSecret) >= 6 { + // Show at most half of the secret + if partialMask > len(runesSecret)/2 { + partialMask = len(runesSecret) / 2 + } + prefix := string(runesSecret[:partialMask]) + redactWith = prefix + redactWith } line = strings.ReplaceAll(line, secret, redactWith) @@ -306,12 +319,21 @@ func (c *Component) Update(args component.Arguments) error { } allowlist = append(allowlist, AllowRule{Regex: re, Source: fmt.Sprintf("rule %s", rule.ID)}) } + for _, currAllowList := range rule.Allowlists { + for _, r := range currAllowList.Regexes { + re, err := regexp.Compile(r) + if err != nil { + level.Error(c.opts.Logger).Log("msg", "error compiling allowlist regex", "error", err) + return err + } + allowlist = append(allowlist, AllowRule{Regex: re, Source: fmt.Sprintf("rule %s", rule.ID)}) + } + } newRule := Rule{ name: rule.ID, regex: re, secretGroup: rule.SecretGroup, - description: rule.Description, allowlist: allowlist, } @@ -325,23 +347,23 @@ func (c *Component) Update(args component.Arguments) error { } // Compiling global allowlist regexes - // From the Gitleaks config - for _, r := range gitleaksCfg.AllowList.Regexes { + // From the arguments + for _, r := range c.args.AllowList { re, err := regexp.Compile(r) if err != nil { level.Error(c.opts.Logger).Log("msg", "error compiling allowlist regex", "error", err) return err } - c.AllowList = append(c.AllowList, AllowRule{Regex: re, Source: "gitleaks config"}) + c.AllowList = append(c.AllowList, AllowRule{Regex: re, Source: "alloy config"}) } - // From the arguments - for _, r := range c.args.AllowList { + // From the Gitleaks config + for _, r := range gitleaksCfg.AllowList.Regexes { re, err := regexp.Compile(r) if err != nil { level.Error(c.opts.Logger).Log("msg", "error compiling allowlist regex", "error", err) return err } - c.AllowList = append(c.AllowList, AllowRule{Regex: re, Source: "alloy config"}) + c.AllowList = append(c.AllowList, AllowRule{Regex: re, Source: "gitleaks config"}) } // Add the generic API key rule last if needed diff --git a/internal/component/loki/secretfilter/secretfilter_test.go b/internal/component/loki/secretfilter/secretfilter_test.go index f6d529f809..cc40698a10 100644 --- a/internal/component/loki/secretfilter/secretfilter_test.go +++ b/internal/component/loki/secretfilter/secretfilter_test.go @@ -43,6 +43,46 @@ var customGitleaksConfig = map[string]string{ description = "Identified a fake secret" regex = '''(?i)\b(fakeSecret\d{5})(?:['|\"|\n|\r|\s|\x60|;]|$)''' `, + "short_secret": ` + title = "gitleaks custom config" + + [[rules]] + id = "short-secret" + description = "Identified a fake short secret" + regex = '''(?i)\b(abc)(?:['|\"|\n|\r|\s|\x60|;]|$)''' + `, + "allow_list_old": ` + title = "gitleaks custom config" + + [[rules]] + id = "my-fake-secret" + description = "Identified a fake secret" + regex = '''(?i)\b(fakeSecret\d{5})(?:['|\"|\n|\r|\s|\x60|;]|$)''' + [rules.allowlist] + regexes = ["abc\\d{3}", "fakeSecret[9]{5}"] + `, + "allow_list_new": ` + title = "gitleaks custom config" + + [[rules]] + id = "my-fake-secret" + description = "Identified a fake secret" + regex = '''(?i)\b(fakeSecret\d{5})(?:['|\"|\n|\r|\s|\x60|;]|$)''' + [[rules.allowlists]] + regexes = ["def\\d{3}", "test\\d{5}"] + [[rules.allowlists]] + regexes = ["abc\\d{3}", "fakeSecret[9]{5}"] + `, + `allow_list_global`: ` + title = "gitleaks custom config" + + [[rules]] + id = "my-fake-secret" + description = "Identified a fake secret" + regex = '''(?i)\b(fakeSecret\d{5})(?:['|\"|\n|\r|\s|\x60|;]|$)''' + [allowlist] + regexes = ["abc\\d{3}", "fakeSecret[9]{5}"] + `, } var defaultRedactionString = "REDACTED-SECRET" @@ -65,6 +105,11 @@ var testConfigs = map[string]string{ forward_to = [] partial_mask = 4 `, + "partial_mask_custom": ` + forward_to = [] + partial_mask = 4 + gitleaks_config = "not-empty" // This will be replaced with the actual path to the temporary gitleaks config file + `, "custom_types": ` forward_to = [] redact_with = "<` + customRedactionString + `:$SECRET_NAME>" @@ -115,6 +160,14 @@ var fakeSecrets = map[string]fakeSecret{ name: "my-fake-secret", value: "fakeSec" + "ret12345", }, + "custom-fake-secret-all9": { + name: "my-fake-secret", + value: "fakeSec" + "ret99999", + }, + "short-secret": { + name: "short-secret", + value: "abc", + }, } // List of fake log entries to use for testing @@ -155,12 +208,24 @@ var testLogs = map[string]testLog{ }`, secrets: []fakeSecret{fakeSecrets["custom-fake-secret"]}, }, + "simple_secret_custom_all9": { + log: `{ + "message": "This is a simple log message with a secret value ` + fakeSecrets["custom-fake-secret-all9"].value + ` ! + }`, + secrets: []fakeSecret{fakeSecrets["custom-fake-secret-all9"]}, + }, "multiple_secrets": { log: `{ "message": "This is a simple log message with a secret value ` + fakeSecrets["grafana-api-key"].value + ` and another secret value ` + fakeSecrets["gcp-api-key"].value + ` ! }`, secrets: []fakeSecret{fakeSecrets["grafana-api-key"], fakeSecrets["gcp-api-key"]}, }, + "short_secret": { + log: `{ + "message": "This is a simple log message with a secret value ` + fakeSecrets["short-secret"].value + ` ! + }`, + secrets: []fakeSecret{fakeSecrets["short-secret"]}, + }, } // Test cases for the secret filter @@ -213,6 +278,13 @@ var tt = []struct { testLogs["simple_secret"].log, replaceSecrets(testLogs["simple_secret"].log, testLogs["simple_secret"].secrets, true, false, defaultRedactionString), }, + { + "partial_mask_too_short", + testConfigs["partial_mask_custom"], + customGitleaksConfig["short_secret"], + testLogs["short_secret"].log, + replaceSecrets(testLogs["short_secret"].log, testLogs["short_secret"].secrets, false, false, defaultRedactionString), + }, { "gcp_secret", testConfigs["default"], @@ -255,6 +327,48 @@ var tt = []struct { testLogs["simple_secret_custom"].log, replaceSecrets(testLogs["simple_secret_custom"].log, testLogs["simple_secret_custom"].secrets, false, false, defaultRedactionString), }, + { + "custom_gitleaks_file_allow_list_old", + testConfigs["custom_gitleaks_file_simple"], + customGitleaksConfig["allow_list_old"], + testLogs["simple_secret_custom_all9"].log, + testLogs["simple_secret_custom_all9"].log, // In the allowlist + }, + { + "custom_gitleaks_file_allow_list_new", + testConfigs["custom_gitleaks_file_simple"], + customGitleaksConfig["allow_list_new"], + testLogs["simple_secret_custom_all9"].log, + testLogs["simple_secret_custom_all9"].log, // In the allowlist + }, + { + "custom_gitleaks_file_allow_list_old_redact", + testConfigs["custom_gitleaks_file_simple"], + customGitleaksConfig["allow_list_old"], + testLogs["simple_secret_custom"].log, + replaceSecrets(testLogs["simple_secret_custom"].log, testLogs["simple_secret_custom"].secrets, false, false, defaultRedactionString), + }, + { + "custom_gitleaks_file_allow_list_new_redact", + testConfigs["custom_gitleaks_file_simple"], + customGitleaksConfig["allow_list_new"], + testLogs["simple_secret_custom"].log, + replaceSecrets(testLogs["simple_secret_custom"].log, testLogs["simple_secret_custom"].secrets, false, false, defaultRedactionString), + }, + { + "custom_gitleaks_file_allow_list_global", + testConfigs["custom_gitleaks_file_simple"], + customGitleaksConfig["allow_list_global"], + testLogs["simple_secret_custom_all9"].log, + testLogs["simple_secret_custom_all9"].log, // In the allowlist + }, + { + "custom_gitleaks_file_allow_list_global_redact", + testConfigs["custom_gitleaks_file_simple"], + customGitleaksConfig["allow_list_global"], + testLogs["simple_secret_custom"].log, + replaceSecrets(testLogs["simple_secret_custom"].log, testLogs["simple_secret_custom"].secrets, false, false, defaultRedactionString), + }, { "multiple_secrets", testConfigs["default"], @@ -272,12 +386,80 @@ func TestSecretFiltering(t *testing.T) { } } +func TestPartialMasking(t *testing.T) { + // Start testing with common cases + component := &Component{} + component.args = Arguments{PartialMask: 4} + + // Too short to be partially masked + redacted := component.redactLine("This is a very short secret ab in a log line", "ab", "test-rule") + require.Equal(t, "This is a very short secret in a log line", redacted) + + // Too short to be partially masked + redacted = component.redactLine("This is a short secret abc12 in a log line", "abc12", "test-rule") + require.Equal(t, "This is a short secret in a log line", redacted) + + // Will be partially masked (limited by secret length) + redacted = component.redactLine("This is a longer secret abc123 in a log line", "abc123", "test-rule") + require.Equal(t, "This is a longer secret abc in a log line", redacted) + + // Will be partially masked + redacted = component.redactLine("This is a long enough secret abcd1234 in a log line", "abcd1234", "test-rule") + require.Equal(t, "This is a long enough secret abcd in a log line", redacted) + + // Will be partially masked + redacted = component.redactLine("This is the longest secret abcdef12345678 in a log line", "abcdef12345678", "test-rule") + require.Equal(t, "This is the longest secret abcd in a log line", redacted) + + // Test with a non-ASCII character + redacted = component.redactLine("This is a line with a complex secret aBc\U0001f512De\U0001f5124 in a log line", "aBc\U0001f512De\U0001f5124", "test-rule") + require.Equal(t, "This is a line with a complex secret aBc\U0001f512 in a log line", redacted) + + // Test with different secret lengths and partial masking values + for partialMasking := range 20 { + for secretLength := range 50 { + if secretLength < 2 { + continue + } + expectedPrefixLength := 0 + if secretLength >= 6 { + expectedPrefixLength = min(secretLength/2, partialMasking) + } + checkPartialMasking(t, partialMasking, secretLength, expectedPrefixLength) + } + } +} + +func checkPartialMasking(t *testing.T, partialMasking int, secretLength int, expectedPrefixLength int) { + component := &Component{} + component.args = Arguments{PartialMask: uint(partialMasking)} + + // Test with a simple ASCII character + secret := strings.Repeat("A", secretLength) + inputLog := fmt.Sprintf("This is a test with a secret %s in a log line", secret) + redacted := component.redactLine(inputLog, secret, "test-rule") + prefix := strings.Repeat("A", expectedPrefixLength) + expectedLog := fmt.Sprintf("This is a test with a secret %s in a log line", prefix) + require.Equal(t, expectedLog, redacted) + + // Test with a non-ASCII character + secret = strings.Repeat("\U0001f512", secretLength) + inputLog = fmt.Sprintf("This is a test with a secret %s in a log line", secret) + redacted = component.redactLine(inputLog, secret, "test-rule") + prefix = strings.Repeat("\U0001f512", expectedPrefixLength) + expectedLog = fmt.Sprintf("This is a test with a secret %s in a log line", prefix) + require.Equal(t, expectedLog, redacted) +} + func runTest(t *testing.T, config string, gitLeaksConfigContent string, inputLog string, expectedLog string) { ch1 := loki.NewLogsReceiver() var args Arguments require.NoError(t, syntax.Unmarshal([]byte(config), &args)) args.ForwardTo = []loki.LogsReceiver{ch1} + // Making sure we're not testing with an empty log line by mistake + require.NotEmpty(t, inputLog) + // If needed, create a temporary gitleaks config file if args.GitleaksConfig != "" { args.GitleaksConfig = createTempGitleaksConfig(t, gitLeaksConfigContent)