Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for configuring Grafana Mimir via PrometheusRule CRDs #2604

Merged
merged 40 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
8c235ca
Initial polling of rules from Grafana Cloud
Logiraptor Dec 2, 2022
d4a65ef
Clean up CortexClient and rename to Mimir
Logiraptor Dec 13, 2022
39d9a47
Initial implementation pulling from CRDs
Logiraptor Dec 2, 2022
ee18ab4
Add diffing algorithm to update rules
Logiraptor Dec 2, 2022
257dcd5
Apply diffs to Mimir
Logiraptor Dec 3, 2022
4454c1e
Rewrite reconciliation to use k8s informer pattern
Logiraptor Dec 5, 2022
f9656a2
Only update rule groups that actually change
Logiraptor Dec 5, 2022
1523a48
Move arguments into a separate file
Logiraptor Dec 5, 2022
4a94e59
Split implementation into smaller files
Logiraptor Dec 5, 2022
a690b92
Add namespace informer
Logiraptor Dec 5, 2022
5e65d59
Map PrometheusRule 1:1 to Mimir rule namespaces
Logiraptor Dec 7, 2022
4d1cf80
Implement the DebugComponent interface
Logiraptor Dec 7, 2022
4a740aa
Remove outdated comment
Logiraptor Dec 7, 2022
d88ee51
Backfill tests for core diffing algorithm
Logiraptor Dec 7, 2022
00b3e40
Add configurable namespace prefix
Logiraptor Dec 7, 2022
aa7d2d7
Rename arguments.go to types.go
Logiraptor Dec 7, 2022
6c4c34d
Simplify event handler to use shared implementation
Logiraptor Dec 7, 2022
96d6843
Add integration tests for event pipeline
Logiraptor Dec 7, 2022
6e9fd94
Simplify MimirClient
Logiraptor Dec 7, 2022
3b49803
Remove logrus from mimir client
Logiraptor Dec 7, 2022
2002add
Remove redundant default setting
Logiraptor Dec 7, 2022
02bc89e
Remove extra logging from MimirClient
Logiraptor Dec 7, 2022
9369571
Replace default argument value setting
Logiraptor Dec 7, 2022
e454ade
Retry event processing up to 5 times
Logiraptor Dec 7, 2022
35d19d3
Add metrics for the workqueue and k8s client
Logiraptor Dec 8, 2022
92666a0
Add metrics for mimir client and event pipeline
Logiraptor Dec 8, 2022
bfa32b2
Address PR feedback
Logiraptor Dec 9, 2022
ff233b5
Fix linter
Logiraptor Dec 9, 2022
f7cddda
Fix linter
Logiraptor Dec 9, 2022
2885fb5
Fix race condition in tests
Logiraptor Dec 9, 2022
8669e63
Fix linter
Logiraptor Dec 9, 2022
b602fc8
Add explanation of the different yaml packages used
Logiraptor Jan 5, 2023
5f8b25b
Use hashicorp/go-multierror instead of dskit
Logiraptor Jan 5, 2023
1479d4f
Implement event handler interface separately
Logiraptor Jan 5, 2023
43db7eb
Remove remote write config from mimir client
Logiraptor Jan 5, 2023
8f3299b
Add CHANGELOG and docs
Logiraptor Jan 5, 2023
5185d9a
Rename component directory to match naming
Logiraptor Jan 5, 2023
0bd63f9
Define a type for the namespace-grouped rule groups
Logiraptor Jan 5, 2023
881c66a
Apply suggestions from code review
Logiraptor Jan 6, 2023
a0b5f4a
Replace metric descriptions with a table
Logiraptor Jan 6, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,12 @@ v0.30.0 (2022-12-20)
- `discovery.file` discovers files on the filesystem following glob
patterns. (@mattdurham)

- `mimir.rules.kubernetes` discovers `PrometheusRule` kubernetes resources and
Logiraptor marked this conversation as resolved.
Show resolved Hide resolved
loads them into a Mimir instance. (@Logiraptor)

- Integrations: Introduce the `snowflake` integration. (@binaryfissiongames)


### Enhancements

- Integrations: Always use direct connection in mongodb_exporter integration. (@v-zhuravlev)
Expand Down
1 change: 1 addition & 0 deletions component/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
_ "github.com/grafana/agent/component/loki/relabel" // Import loki.relabel
_ "github.com/grafana/agent/component/loki/source/file" // Import loki.source.file
_ "github.com/grafana/agent/component/loki/write" // Import loki.write
_ "github.com/grafana/agent/component/mimir/rules/kubernetes" // Import mimir.rules.kubernetes
_ "github.com/grafana/agent/component/otelcol/auth/basic" // Import otelcol.auth.basic
_ "github.com/grafana/agent/component/otelcol/auth/bearer" // Import otelcol.auth.bearer
_ "github.com/grafana/agent/component/otelcol/auth/headers" // Import otelcol.auth.headers
Expand Down
64 changes: 64 additions & 0 deletions component/mimir/rules/kubernetes/debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package rules

import "fmt"

type DebugInfo struct {
Error string `river:"error,attr,optional"`
PrometheusRules []DebugK8sPrometheusRule `river:"prometheus_rule,block,optional"`
MimirRuleNamespaces []DebugMimirNamespace `river:"mimir_rule_namespace,block,optional"`
}

type DebugK8sPrometheusRule struct {
Namespace string `river:"namespace,attr"`
Name string `river:"name,attr"`
UID string `river:"uid,attr"`
NumRuleGroups int `river:"num_rule_groups,attr"`
}

type DebugMimirNamespace struct {
Name string `river:"name,attr"`
NumRuleGroups int `river:"num_rule_groups,attr"`
}

func (c *Component) DebugInfo() interface{} {
var output DebugInfo
for ns := range c.currentState {
if !isManagedMimirNamespace(c.args.MimirNameSpacePrefix, ns) {
continue
}

output.MimirRuleNamespaces = append(output.MimirRuleNamespaces, DebugMimirNamespace{
Name: ns,
NumRuleGroups: len(c.currentState[ns]),
})
}

// This should load from the informer cache, so it shouldn't fail under normal circumstances.
managedK8sNamespaces, err := c.namespaceLister.List(c.namespaceSelector)
if err != nil {
return DebugInfo{
Error: fmt.Sprintf("failed to list namespaces: %v", err),
}
}

for _, n := range managedK8sNamespaces {
// This should load from the informer cache, so it shouldn't fail under normal circumstances.
rules, err := c.ruleLister.PrometheusRules(n.Name).List(c.ruleSelector)
if err != nil {
return DebugInfo{
Error: fmt.Sprintf("failed to list rules: %v", err),
}
}

for _, r := range rules {
output.PrometheusRules = append(output.PrometheusRules, DebugK8sPrometheusRule{
Namespace: n.Name,
Name: r.Name,
UID: string(r.UID),
NumRuleGroups: len(r.Spec.Groups),
})
}
}

return output
}
113 changes: 113 additions & 0 deletions component/mimir/rules/kubernetes/diff.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package rules

import (
"bytes"

"github.com/prometheus/prometheus/model/rulefmt"
"gopkg.in/yaml.v3" // Used for prometheus rulefmt compatibility instead of gopkg.in/yaml.v2
)

type ruleGroupDiffKind string

const (
ruleGroupDiffKindAdd ruleGroupDiffKind = "add"
ruleGroupDiffKindRemove ruleGroupDiffKind = "remove"
ruleGroupDiffKindUpdate ruleGroupDiffKind = "update"
)

type ruleGroupDiff struct {
Kind ruleGroupDiffKind
Actual rulefmt.RuleGroup
Desired rulefmt.RuleGroup
}

type ruleGroupsByNamespace map[string][]rulefmt.RuleGroup
type ruleGroupDiffsByNamespace map[string][]ruleGroupDiff

func diffRuleState(desired, actual ruleGroupsByNamespace) ruleGroupDiffsByNamespace {
seenNamespaces := map[string]bool{}

diff := make(ruleGroupDiffsByNamespace)

for namespace, desiredRuleGroups := range desired {
seenNamespaces[namespace] = true

actualRuleGroups := actual[namespace]
subDiff := diffRuleNamespaceState(desiredRuleGroups, actualRuleGroups)

if len(subDiff) == 0 {
continue
}

diff[namespace] = subDiff
}

for namespace, actualRuleGroups := range actual {
if seenNamespaces[namespace] {
continue
}

subDiff := diffRuleNamespaceState(nil, actualRuleGroups)

diff[namespace] = subDiff
}

return diff
}

func diffRuleNamespaceState(desired []rulefmt.RuleGroup, actual []rulefmt.RuleGroup) []ruleGroupDiff {
var diff []ruleGroupDiff

seenGroups := map[string]bool{}

desiredGroups:
for _, desiredRuleGroup := range desired {
seenGroups[desiredRuleGroup.Name] = true

for _, actualRuleGroup := range actual {
if desiredRuleGroup.Name == actualRuleGroup.Name {
if equalRuleGroups(desiredRuleGroup, actualRuleGroup) {
continue desiredGroups
}

diff = append(diff, ruleGroupDiff{
Kind: ruleGroupDiffKindUpdate,
Actual: actualRuleGroup,
Desired: desiredRuleGroup,
})
continue desiredGroups
}
}

diff = append(diff, ruleGroupDiff{
Kind: ruleGroupDiffKindAdd,
Desired: desiredRuleGroup,
})
}

for _, actualRuleGroup := range actual {
if seenGroups[actualRuleGroup.Name] {
continue
}

diff = append(diff, ruleGroupDiff{
Kind: ruleGroupDiffKindRemove,
Actual: actualRuleGroup,
})
}

return diff
}

func equalRuleGroups(a, b rulefmt.RuleGroup) bool {
aBuf, err := yaml.Marshal(a)
if err != nil {
return false
}
bBuf, err := yaml.Marshal(b)
if err != nil {
return false
}

return bytes.Equal(aBuf, bBuf)
}
157 changes: 157 additions & 0 deletions component/mimir/rules/kubernetes/diff_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package rules

import (
"fmt"
"testing"

"github.com/prometheus/prometheus/model/rulefmt"
"github.com/stretchr/testify/require"
)

func parseRuleGroups(t *testing.T, buf []byte) []rulefmt.RuleGroup {
t.Helper()

groups, errs := rulefmt.Parse(buf)
require.Empty(t, errs)

return groups.Groups
}

func TestDiffRuleState(t *testing.T) {
ruleGroupsA := parseRuleGroups(t, []byte(`
groups:
- name: rule-group-a
interval: 1m
rules:
- record: rule_a
expr: 1
`))

ruleGroupsAModified := parseRuleGroups(t, []byte(`
groups:
- name: rule-group-a
interval: 1m
rules:
- record: rule_a
expr: 3
`))

managedNamespace := "agent/namespace/name/12345678-1234-1234-1234-123456789012"

type testCase struct {
name string
desired map[string][]rulefmt.RuleGroup
actual map[string][]rulefmt.RuleGroup
expected map[string][]ruleGroupDiff
}

testCases := []testCase{
{
name: "empty sets",
desired: map[string][]rulefmt.RuleGroup{},
actual: map[string][]rulefmt.RuleGroup{},
expected: map[string][]ruleGroupDiff{},
},
{
name: "add rule group",
desired: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
actual: map[string][]rulefmt.RuleGroup{},
expected: map[string][]ruleGroupDiff{
managedNamespace: {
{
Kind: ruleGroupDiffKindAdd,
Desired: ruleGroupsA[0],
},
},
},
},
{
name: "remove rule group",
desired: map[string][]rulefmt.RuleGroup{},
actual: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
expected: map[string][]ruleGroupDiff{
managedNamespace: {
{
Kind: ruleGroupDiffKindRemove,
Actual: ruleGroupsA[0],
},
},
},
},
{
name: "update rule group",
desired: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
actual: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsAModified,
},
expected: map[string][]ruleGroupDiff{
managedNamespace: {
{
Kind: ruleGroupDiffKindUpdate,
Desired: ruleGroupsA[0],
Actual: ruleGroupsAModified[0],
},
},
},
},
{
name: "unchanged rule groups",
desired: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
actual: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
expected: map[string][]ruleGroupDiff{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actual := diffRuleState(tc.desired, tc.actual)
requireEqualRuleDiffs(t, tc.expected, actual)
})
}
}

func requireEqualRuleDiffs(t *testing.T, expected, actual map[string][]ruleGroupDiff) {
require.Equal(t, len(expected), len(actual))

var summarizeDiff = func(diff ruleGroupDiff) string {
switch diff.Kind {
case ruleGroupDiffKindAdd:
return fmt.Sprintf("add: %s", diff.Desired.Name)
case ruleGroupDiffKindRemove:
return fmt.Sprintf("remove: %s", diff.Actual.Name)
case ruleGroupDiffKindUpdate:
return fmt.Sprintf("update: %s", diff.Desired.Name)
}
panic("unreachable")
}

for namespace, expectedDiffs := range expected {
actualDiffs, ok := actual[namespace]
require.True(t, ok)

require.Equal(t, len(expectedDiffs), len(actualDiffs))

for i, expectedDiff := range expectedDiffs {
actualDiff := actualDiffs[i]

if expectedDiff.Kind != actualDiff.Kind ||
!equalRuleGroups(expectedDiff.Desired, actualDiff.Desired) ||
!equalRuleGroups(expectedDiff.Actual, actualDiff.Actual) {

t.Logf("expected diff: %s", summarizeDiff(expectedDiff))
t.Logf("actual diff: %s", summarizeDiff(actualDiff))
t.Fail()
}
}
}
}
Loading