-
Notifications
You must be signed in to change notification settings - Fork 487
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for configuring Grafana Mimir via PrometheusRule CRDs (#2604
) * Initial polling of rules from Grafana Cloud Client copied from cortextool * Clean up CortexClient and rename to Mimir - Use newer ruler API urls - Remove usage of ioutil - Rename Cortex to Mimir * Initial implementation pulling from CRDs * Add diffing algorithm to update rules * Apply diffs to Mimir * Rewrite reconciliation to use k8s informer pattern * Only update rule groups that actually change * Move arguments into a separate file * Split implementation into smaller files * Add namespace informer - Also filter resources in the informer to avoid unecessary events and allocations * Map PrometheusRule 1:1 to Mimir rule namespaces Only namespaces matching the expected naming convention are reconciled * Implement the DebugComponent interface * Remove outdated comment * Backfill tests for core diffing algorithm * Add configurable namespace prefix - This allows multiple agents to manage groups of namespaces without conflicting * Rename arguments.go to types.go * Simplify event handler to use shared implementation * Add integration tests for event pipeline * Simplify MimirClient - Remove methods copied from cortextool that are unused here * Remove logrus from mimir client Use go-kit/log instead * Remove redundant default setting * Remove extra logging from MimirClient * Replace default argument value setting * Retry event processing up to 5 times * Add metrics for the workqueue and k8s client - These are namespaced `prometheus_sd_kubernetes` because the registration is global. * Add metrics for mimir client and event pipeline * Address PR feedback - Use snake case in river tags - Export minimum api from packages - Re-use existing config type for mimir client - Implement component health - Set defaults in Unmarshal method - Remove exports type - Use river blocks where appropriate - Rename to mimir.rules.kubernetes - Initialize with config during construction Co-authored-by: Robert Fratto <[email protected]> * Fix linter * Fix linter * Fix race condition in tests * Fix linter * Add explanation of the different yaml packages used * Use hashicorp/go-multierror instead of dskit * Implement event handler interface separately - The component no longer implements the event handler interface * Remove remote write config from mimir client - This is a GEM-only feature and rarely used. We can add it back in the future if needed. * Add CHANGELOG and docs * Rename component directory to match naming * Define a type for the namespace-grouped rule groups * Apply suggestions from code review * Replace metric descriptions with a table Co-authored-by: Robert Fratto <[email protected]> Co-authored-by: Karen Germond <[email protected]>
- Loading branch information
1 parent
308955c
commit 3d62816
Showing
18 changed files
with
1,837 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package rules | ||
|
||
import "fmt" | ||
|
||
type DebugInfo struct { | ||
Error string `river:"error,attr,optional"` | ||
PrometheusRules []DebugK8sPrometheusRule `river:"prometheus_rule,block,optional"` | ||
MimirRuleNamespaces []DebugMimirNamespace `river:"mimir_rule_namespace,block,optional"` | ||
} | ||
|
||
type DebugK8sPrometheusRule struct { | ||
Namespace string `river:"namespace,attr"` | ||
Name string `river:"name,attr"` | ||
UID string `river:"uid,attr"` | ||
NumRuleGroups int `river:"num_rule_groups,attr"` | ||
} | ||
|
||
type DebugMimirNamespace struct { | ||
Name string `river:"name,attr"` | ||
NumRuleGroups int `river:"num_rule_groups,attr"` | ||
} | ||
|
||
func (c *Component) DebugInfo() interface{} { | ||
var output DebugInfo | ||
for ns := range c.currentState { | ||
if !isManagedMimirNamespace(c.args.MimirNameSpacePrefix, ns) { | ||
continue | ||
} | ||
|
||
output.MimirRuleNamespaces = append(output.MimirRuleNamespaces, DebugMimirNamespace{ | ||
Name: ns, | ||
NumRuleGroups: len(c.currentState[ns]), | ||
}) | ||
} | ||
|
||
// This should load from the informer cache, so it shouldn't fail under normal circumstances. | ||
managedK8sNamespaces, err := c.namespaceLister.List(c.namespaceSelector) | ||
if err != nil { | ||
return DebugInfo{ | ||
Error: fmt.Sprintf("failed to list namespaces: %v", err), | ||
} | ||
} | ||
|
||
for _, n := range managedK8sNamespaces { | ||
// This should load from the informer cache, so it shouldn't fail under normal circumstances. | ||
rules, err := c.ruleLister.PrometheusRules(n.Name).List(c.ruleSelector) | ||
if err != nil { | ||
return DebugInfo{ | ||
Error: fmt.Sprintf("failed to list rules: %v", err), | ||
} | ||
} | ||
|
||
for _, r := range rules { | ||
output.PrometheusRules = append(output.PrometheusRules, DebugK8sPrometheusRule{ | ||
Namespace: n.Name, | ||
Name: r.Name, | ||
UID: string(r.UID), | ||
NumRuleGroups: len(r.Spec.Groups), | ||
}) | ||
} | ||
} | ||
|
||
return output | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
package rules | ||
|
||
import ( | ||
"bytes" | ||
|
||
"github.com/prometheus/prometheus/model/rulefmt" | ||
"gopkg.in/yaml.v3" // Used for prometheus rulefmt compatibility instead of gopkg.in/yaml.v2 | ||
) | ||
|
||
type ruleGroupDiffKind string | ||
|
||
const ( | ||
ruleGroupDiffKindAdd ruleGroupDiffKind = "add" | ||
ruleGroupDiffKindRemove ruleGroupDiffKind = "remove" | ||
ruleGroupDiffKindUpdate ruleGroupDiffKind = "update" | ||
) | ||
|
||
type ruleGroupDiff struct { | ||
Kind ruleGroupDiffKind | ||
Actual rulefmt.RuleGroup | ||
Desired rulefmt.RuleGroup | ||
} | ||
|
||
type ruleGroupsByNamespace map[string][]rulefmt.RuleGroup | ||
type ruleGroupDiffsByNamespace map[string][]ruleGroupDiff | ||
|
||
func diffRuleState(desired, actual ruleGroupsByNamespace) ruleGroupDiffsByNamespace { | ||
seenNamespaces := map[string]bool{} | ||
|
||
diff := make(ruleGroupDiffsByNamespace) | ||
|
||
for namespace, desiredRuleGroups := range desired { | ||
seenNamespaces[namespace] = true | ||
|
||
actualRuleGroups := actual[namespace] | ||
subDiff := diffRuleNamespaceState(desiredRuleGroups, actualRuleGroups) | ||
|
||
if len(subDiff) == 0 { | ||
continue | ||
} | ||
|
||
diff[namespace] = subDiff | ||
} | ||
|
||
for namespace, actualRuleGroups := range actual { | ||
if seenNamespaces[namespace] { | ||
continue | ||
} | ||
|
||
subDiff := diffRuleNamespaceState(nil, actualRuleGroups) | ||
|
||
diff[namespace] = subDiff | ||
} | ||
|
||
return diff | ||
} | ||
|
||
func diffRuleNamespaceState(desired []rulefmt.RuleGroup, actual []rulefmt.RuleGroup) []ruleGroupDiff { | ||
var diff []ruleGroupDiff | ||
|
||
seenGroups := map[string]bool{} | ||
|
||
desiredGroups: | ||
for _, desiredRuleGroup := range desired { | ||
seenGroups[desiredRuleGroup.Name] = true | ||
|
||
for _, actualRuleGroup := range actual { | ||
if desiredRuleGroup.Name == actualRuleGroup.Name { | ||
if equalRuleGroups(desiredRuleGroup, actualRuleGroup) { | ||
continue desiredGroups | ||
} | ||
|
||
diff = append(diff, ruleGroupDiff{ | ||
Kind: ruleGroupDiffKindUpdate, | ||
Actual: actualRuleGroup, | ||
Desired: desiredRuleGroup, | ||
}) | ||
continue desiredGroups | ||
} | ||
} | ||
|
||
diff = append(diff, ruleGroupDiff{ | ||
Kind: ruleGroupDiffKindAdd, | ||
Desired: desiredRuleGroup, | ||
}) | ||
} | ||
|
||
for _, actualRuleGroup := range actual { | ||
if seenGroups[actualRuleGroup.Name] { | ||
continue | ||
} | ||
|
||
diff = append(diff, ruleGroupDiff{ | ||
Kind: ruleGroupDiffKindRemove, | ||
Actual: actualRuleGroup, | ||
}) | ||
} | ||
|
||
return diff | ||
} | ||
|
||
func equalRuleGroups(a, b rulefmt.RuleGroup) bool { | ||
aBuf, err := yaml.Marshal(a) | ||
if err != nil { | ||
return false | ||
} | ||
bBuf, err := yaml.Marshal(b) | ||
if err != nil { | ||
return false | ||
} | ||
|
||
return bytes.Equal(aBuf, bBuf) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
package rules | ||
|
||
import ( | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/prometheus/prometheus/model/rulefmt" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func parseRuleGroups(t *testing.T, buf []byte) []rulefmt.RuleGroup { | ||
t.Helper() | ||
|
||
groups, errs := rulefmt.Parse(buf) | ||
require.Empty(t, errs) | ||
|
||
return groups.Groups | ||
} | ||
|
||
func TestDiffRuleState(t *testing.T) { | ||
ruleGroupsA := parseRuleGroups(t, []byte(` | ||
groups: | ||
- name: rule-group-a | ||
interval: 1m | ||
rules: | ||
- record: rule_a | ||
expr: 1 | ||
`)) | ||
|
||
ruleGroupsAModified := parseRuleGroups(t, []byte(` | ||
groups: | ||
- name: rule-group-a | ||
interval: 1m | ||
rules: | ||
- record: rule_a | ||
expr: 3 | ||
`)) | ||
|
||
managedNamespace := "agent/namespace/name/12345678-1234-1234-1234-123456789012" | ||
|
||
type testCase struct { | ||
name string | ||
desired map[string][]rulefmt.RuleGroup | ||
actual map[string][]rulefmt.RuleGroup | ||
expected map[string][]ruleGroupDiff | ||
} | ||
|
||
testCases := []testCase{ | ||
{ | ||
name: "empty sets", | ||
desired: map[string][]rulefmt.RuleGroup{}, | ||
actual: map[string][]rulefmt.RuleGroup{}, | ||
expected: map[string][]ruleGroupDiff{}, | ||
}, | ||
{ | ||
name: "add rule group", | ||
desired: map[string][]rulefmt.RuleGroup{ | ||
managedNamespace: ruleGroupsA, | ||
}, | ||
actual: map[string][]rulefmt.RuleGroup{}, | ||
expected: map[string][]ruleGroupDiff{ | ||
managedNamespace: { | ||
{ | ||
Kind: ruleGroupDiffKindAdd, | ||
Desired: ruleGroupsA[0], | ||
}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "remove rule group", | ||
desired: map[string][]rulefmt.RuleGroup{}, | ||
actual: map[string][]rulefmt.RuleGroup{ | ||
managedNamespace: ruleGroupsA, | ||
}, | ||
expected: map[string][]ruleGroupDiff{ | ||
managedNamespace: { | ||
{ | ||
Kind: ruleGroupDiffKindRemove, | ||
Actual: ruleGroupsA[0], | ||
}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "update rule group", | ||
desired: map[string][]rulefmt.RuleGroup{ | ||
managedNamespace: ruleGroupsA, | ||
}, | ||
actual: map[string][]rulefmt.RuleGroup{ | ||
managedNamespace: ruleGroupsAModified, | ||
}, | ||
expected: map[string][]ruleGroupDiff{ | ||
managedNamespace: { | ||
{ | ||
Kind: ruleGroupDiffKindUpdate, | ||
Desired: ruleGroupsA[0], | ||
Actual: ruleGroupsAModified[0], | ||
}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "unchanged rule groups", | ||
desired: map[string][]rulefmt.RuleGroup{ | ||
managedNamespace: ruleGroupsA, | ||
}, | ||
actual: map[string][]rulefmt.RuleGroup{ | ||
managedNamespace: ruleGroupsA, | ||
}, | ||
expected: map[string][]ruleGroupDiff{}, | ||
}, | ||
} | ||
|
||
for _, tc := range testCases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
actual := diffRuleState(tc.desired, tc.actual) | ||
requireEqualRuleDiffs(t, tc.expected, actual) | ||
}) | ||
} | ||
} | ||
|
||
func requireEqualRuleDiffs(t *testing.T, expected, actual map[string][]ruleGroupDiff) { | ||
require.Equal(t, len(expected), len(actual)) | ||
|
||
var summarizeDiff = func(diff ruleGroupDiff) string { | ||
switch diff.Kind { | ||
case ruleGroupDiffKindAdd: | ||
return fmt.Sprintf("add: %s", diff.Desired.Name) | ||
case ruleGroupDiffKindRemove: | ||
return fmt.Sprintf("remove: %s", diff.Actual.Name) | ||
case ruleGroupDiffKindUpdate: | ||
return fmt.Sprintf("update: %s", diff.Desired.Name) | ||
} | ||
panic("unreachable") | ||
} | ||
|
||
for namespace, expectedDiffs := range expected { | ||
actualDiffs, ok := actual[namespace] | ||
require.True(t, ok) | ||
|
||
require.Equal(t, len(expectedDiffs), len(actualDiffs)) | ||
|
||
for i, expectedDiff := range expectedDiffs { | ||
actualDiff := actualDiffs[i] | ||
|
||
if expectedDiff.Kind != actualDiff.Kind || | ||
!equalRuleGroups(expectedDiff.Desired, actualDiff.Desired) || | ||
!equalRuleGroups(expectedDiff.Actual, actualDiff.Actual) { | ||
|
||
t.Logf("expected diff: %s", summarizeDiff(expectedDiff)) | ||
t.Logf("actual diff: %s", summarizeDiff(actualDiff)) | ||
t.Fail() | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.