Skip to content

Commit

Permalink
Add support for configuring Grafana Mimir via PrometheusRule CRDs (#2604
Browse files Browse the repository at this point in the history
)

* Initial polling of rules from Grafana Cloud

Client copied from cortextool

* Clean up CortexClient and rename to Mimir

- Use newer ruler API urls
- Remove usage of ioutil
- Rename Cortex to Mimir

* Initial implementation pulling from CRDs

* Add diffing algorithm to update rules

* Apply diffs to Mimir

* Rewrite reconciliation to use k8s informer pattern

* Only update rule groups that actually change

* Move arguments into a separate file

* Split implementation into smaller files

* Add namespace informer

- Also filter resources in the informer to avoid unecessary events and allocations

* Map PrometheusRule 1:1 to Mimir rule namespaces

Only namespaces matching the expected naming convention are reconciled

* Implement the DebugComponent interface

* Remove outdated comment

* Backfill tests for core diffing algorithm

* Add configurable namespace prefix

- This allows multiple agents to manage groups of namespaces without conflicting

* Rename arguments.go to types.go

* Simplify event handler to use shared implementation

* Add integration tests for event pipeline

* Simplify MimirClient

- Remove methods copied from cortextool that are unused here

* Remove logrus from mimir client

Use go-kit/log instead

* Remove redundant default setting

* Remove extra logging from MimirClient

* Replace default argument value setting

* Retry event processing up to 5 times

* Add metrics for the workqueue and k8s client

- These are namespaced `prometheus_sd_kubernetes` because the
  registration is global.

* Add metrics for mimir client and event pipeline

* Address PR feedback

- Use snake case in river tags
- Export minimum api from packages
- Re-use existing config type for mimir client
- Implement component health
- Set defaults in Unmarshal method
- Remove exports type
- Use river blocks where appropriate
- Rename to mimir.rules.kubernetes
- Initialize with config during construction

Co-authored-by: Robert Fratto <[email protected]>

* Fix linter

* Fix linter

* Fix race condition in tests

* Fix linter

* Add explanation of the different yaml packages used

* Use hashicorp/go-multierror instead of dskit

* Implement event handler interface separately

- The component no longer implements the event handler interface

* Remove remote write config from mimir client

- This is a GEM-only feature and rarely used. We can add it back in
  the future if needed.

* Add CHANGELOG and docs

* Rename component directory to match naming

* Define a type for the namespace-grouped rule groups

* Apply suggestions from code review

* Replace metric descriptions with a table

Co-authored-by: Robert Fratto <[email protected]>
Co-authored-by: Karen Germond <[email protected]>
  • Loading branch information
3 people authored Jan 6, 2023
1 parent 308955c commit 3d62816
Show file tree
Hide file tree
Showing 18 changed files with 1,837 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,12 @@ v0.30.0 (2022-12-20)
- `discovery.file` discovers files on the filesystem following glob
patterns. (@mattdurham)

- `mimir.rules.kubernetes` discovers `PrometheusRule` Kubernetes resources and
loads them into a Mimir instance. (@Logiraptor)

- Integrations: Introduce the `snowflake` integration. (@binaryfissiongames)


### Enhancements

- Integrations: Always use direct connection in mongodb_exporter integration. (@v-zhuravlev)
Expand Down
1 change: 1 addition & 0 deletions component/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
_ "github.com/grafana/agent/component/loki/relabel" // Import loki.relabel
_ "github.com/grafana/agent/component/loki/source/file" // Import loki.source.file
_ "github.com/grafana/agent/component/loki/write" // Import loki.write
_ "github.com/grafana/agent/component/mimir/rules/kubernetes" // Import mimir.rules.kubernetes
_ "github.com/grafana/agent/component/otelcol/auth/basic" // Import otelcol.auth.basic
_ "github.com/grafana/agent/component/otelcol/auth/bearer" // Import otelcol.auth.bearer
_ "github.com/grafana/agent/component/otelcol/auth/headers" // Import otelcol.auth.headers
Expand Down
64 changes: 64 additions & 0 deletions component/mimir/rules/kubernetes/debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package rules

import "fmt"

type DebugInfo struct {
Error string `river:"error,attr,optional"`
PrometheusRules []DebugK8sPrometheusRule `river:"prometheus_rule,block,optional"`
MimirRuleNamespaces []DebugMimirNamespace `river:"mimir_rule_namespace,block,optional"`
}

type DebugK8sPrometheusRule struct {
Namespace string `river:"namespace,attr"`
Name string `river:"name,attr"`
UID string `river:"uid,attr"`
NumRuleGroups int `river:"num_rule_groups,attr"`
}

type DebugMimirNamespace struct {
Name string `river:"name,attr"`
NumRuleGroups int `river:"num_rule_groups,attr"`
}

func (c *Component) DebugInfo() interface{} {
var output DebugInfo
for ns := range c.currentState {
if !isManagedMimirNamespace(c.args.MimirNameSpacePrefix, ns) {
continue
}

output.MimirRuleNamespaces = append(output.MimirRuleNamespaces, DebugMimirNamespace{
Name: ns,
NumRuleGroups: len(c.currentState[ns]),
})
}

// This should load from the informer cache, so it shouldn't fail under normal circumstances.
managedK8sNamespaces, err := c.namespaceLister.List(c.namespaceSelector)
if err != nil {
return DebugInfo{
Error: fmt.Sprintf("failed to list namespaces: %v", err),
}
}

for _, n := range managedK8sNamespaces {
// This should load from the informer cache, so it shouldn't fail under normal circumstances.
rules, err := c.ruleLister.PrometheusRules(n.Name).List(c.ruleSelector)
if err != nil {
return DebugInfo{
Error: fmt.Sprintf("failed to list rules: %v", err),
}
}

for _, r := range rules {
output.PrometheusRules = append(output.PrometheusRules, DebugK8sPrometheusRule{
Namespace: n.Name,
Name: r.Name,
UID: string(r.UID),
NumRuleGroups: len(r.Spec.Groups),
})
}
}

return output
}
113 changes: 113 additions & 0 deletions component/mimir/rules/kubernetes/diff.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package rules

import (
"bytes"

"github.com/prometheus/prometheus/model/rulefmt"
"gopkg.in/yaml.v3" // Used for prometheus rulefmt compatibility instead of gopkg.in/yaml.v2
)

type ruleGroupDiffKind string

const (
ruleGroupDiffKindAdd ruleGroupDiffKind = "add"
ruleGroupDiffKindRemove ruleGroupDiffKind = "remove"
ruleGroupDiffKindUpdate ruleGroupDiffKind = "update"
)

type ruleGroupDiff struct {
Kind ruleGroupDiffKind
Actual rulefmt.RuleGroup
Desired rulefmt.RuleGroup
}

type ruleGroupsByNamespace map[string][]rulefmt.RuleGroup
type ruleGroupDiffsByNamespace map[string][]ruleGroupDiff

func diffRuleState(desired, actual ruleGroupsByNamespace) ruleGroupDiffsByNamespace {
seenNamespaces := map[string]bool{}

diff := make(ruleGroupDiffsByNamespace)

for namespace, desiredRuleGroups := range desired {
seenNamespaces[namespace] = true

actualRuleGroups := actual[namespace]
subDiff := diffRuleNamespaceState(desiredRuleGroups, actualRuleGroups)

if len(subDiff) == 0 {
continue
}

diff[namespace] = subDiff
}

for namespace, actualRuleGroups := range actual {
if seenNamespaces[namespace] {
continue
}

subDiff := diffRuleNamespaceState(nil, actualRuleGroups)

diff[namespace] = subDiff
}

return diff
}

func diffRuleNamespaceState(desired []rulefmt.RuleGroup, actual []rulefmt.RuleGroup) []ruleGroupDiff {
var diff []ruleGroupDiff

seenGroups := map[string]bool{}

desiredGroups:
for _, desiredRuleGroup := range desired {
seenGroups[desiredRuleGroup.Name] = true

for _, actualRuleGroup := range actual {
if desiredRuleGroup.Name == actualRuleGroup.Name {
if equalRuleGroups(desiredRuleGroup, actualRuleGroup) {
continue desiredGroups
}

diff = append(diff, ruleGroupDiff{
Kind: ruleGroupDiffKindUpdate,
Actual: actualRuleGroup,
Desired: desiredRuleGroup,
})
continue desiredGroups
}
}

diff = append(diff, ruleGroupDiff{
Kind: ruleGroupDiffKindAdd,
Desired: desiredRuleGroup,
})
}

for _, actualRuleGroup := range actual {
if seenGroups[actualRuleGroup.Name] {
continue
}

diff = append(diff, ruleGroupDiff{
Kind: ruleGroupDiffKindRemove,
Actual: actualRuleGroup,
})
}

return diff
}

func equalRuleGroups(a, b rulefmt.RuleGroup) bool {
aBuf, err := yaml.Marshal(a)
if err != nil {
return false
}
bBuf, err := yaml.Marshal(b)
if err != nil {
return false
}

return bytes.Equal(aBuf, bBuf)
}
157 changes: 157 additions & 0 deletions component/mimir/rules/kubernetes/diff_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package rules

import (
"fmt"
"testing"

"github.com/prometheus/prometheus/model/rulefmt"
"github.com/stretchr/testify/require"
)

func parseRuleGroups(t *testing.T, buf []byte) []rulefmt.RuleGroup {
t.Helper()

groups, errs := rulefmt.Parse(buf)
require.Empty(t, errs)

return groups.Groups
}

func TestDiffRuleState(t *testing.T) {
ruleGroupsA := parseRuleGroups(t, []byte(`
groups:
- name: rule-group-a
interval: 1m
rules:
- record: rule_a
expr: 1
`))

ruleGroupsAModified := parseRuleGroups(t, []byte(`
groups:
- name: rule-group-a
interval: 1m
rules:
- record: rule_a
expr: 3
`))

managedNamespace := "agent/namespace/name/12345678-1234-1234-1234-123456789012"

type testCase struct {
name string
desired map[string][]rulefmt.RuleGroup
actual map[string][]rulefmt.RuleGroup
expected map[string][]ruleGroupDiff
}

testCases := []testCase{
{
name: "empty sets",
desired: map[string][]rulefmt.RuleGroup{},
actual: map[string][]rulefmt.RuleGroup{},
expected: map[string][]ruleGroupDiff{},
},
{
name: "add rule group",
desired: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
actual: map[string][]rulefmt.RuleGroup{},
expected: map[string][]ruleGroupDiff{
managedNamespace: {
{
Kind: ruleGroupDiffKindAdd,
Desired: ruleGroupsA[0],
},
},
},
},
{
name: "remove rule group",
desired: map[string][]rulefmt.RuleGroup{},
actual: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
expected: map[string][]ruleGroupDiff{
managedNamespace: {
{
Kind: ruleGroupDiffKindRemove,
Actual: ruleGroupsA[0],
},
},
},
},
{
name: "update rule group",
desired: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
actual: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsAModified,
},
expected: map[string][]ruleGroupDiff{
managedNamespace: {
{
Kind: ruleGroupDiffKindUpdate,
Desired: ruleGroupsA[0],
Actual: ruleGroupsAModified[0],
},
},
},
},
{
name: "unchanged rule groups",
desired: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
actual: map[string][]rulefmt.RuleGroup{
managedNamespace: ruleGroupsA,
},
expected: map[string][]ruleGroupDiff{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actual := diffRuleState(tc.desired, tc.actual)
requireEqualRuleDiffs(t, tc.expected, actual)
})
}
}

func requireEqualRuleDiffs(t *testing.T, expected, actual map[string][]ruleGroupDiff) {
require.Equal(t, len(expected), len(actual))

var summarizeDiff = func(diff ruleGroupDiff) string {
switch diff.Kind {
case ruleGroupDiffKindAdd:
return fmt.Sprintf("add: %s", diff.Desired.Name)
case ruleGroupDiffKindRemove:
return fmt.Sprintf("remove: %s", diff.Actual.Name)
case ruleGroupDiffKindUpdate:
return fmt.Sprintf("update: %s", diff.Desired.Name)
}
panic("unreachable")
}

for namespace, expectedDiffs := range expected {
actualDiffs, ok := actual[namespace]
require.True(t, ok)

require.Equal(t, len(expectedDiffs), len(actualDiffs))

for i, expectedDiff := range expectedDiffs {
actualDiff := actualDiffs[i]

if expectedDiff.Kind != actualDiff.Kind ||
!equalRuleGroups(expectedDiff.Desired, actualDiff.Desired) ||
!equalRuleGroups(expectedDiff.Actual, actualDiff.Actual) {

t.Logf("expected diff: %s", summarizeDiff(expectedDiff))
t.Logf("actual diff: %s", summarizeDiff(actualDiff))
t.Fail()
}
}
}
}
Loading

0 comments on commit 3d62816

Please sign in to comment.