Skip to content

Commit

Permalink
policy: rework policy/backend metrics interface.
Browse files Browse the repository at this point in the history
Simplify the policy-backend metrics collection interface,
reducing it to a single GetMetrics() call and a returned
Metrics interface which simply implements the collector-
like Describe() and Collect() interfaces. Update policy
implementations accordingly.

Signed-off-by: Krisztian Litkey <[email protected]>
  • Loading branch information
klihub committed Nov 11, 2024
1 parent 095de2c commit ea333a0
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 69 deletions.
31 changes: 13 additions & 18 deletions cmd/plugins/balloons/policy/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,7 @@ type BalloonMetrics struct {
ContainerReqMilliCpus int
}

// DescribeMetrics generates policy-specific prometheus metrics data
// descriptors.
func (p *balloons) DescribeMetrics() []*prometheus.Desc {
return descriptors
}

// PollMetrics provides policy metrics for monitoring.
func (p *balloons) PollMetrics() policy.Metrics {
func (p *balloons) GetMetrics() policy.Metrics {
policyMetrics := &Metrics{}
policyMetrics.Balloons = make([]*BalloonMetrics, len(p.balloons))
for index, bln := range p.balloons {
Expand Down Expand Up @@ -150,16 +143,19 @@ func (p *balloons) PollMetrics() policy.Metrics {
return policyMetrics
}

// CollectMetrics generates prometheus metrics from cached/polled
// policy-specific metrics data.
func (p *balloons) CollectMetrics(m policy.Metrics) ([]prometheus.Metric, error) {
metrics, ok := m.(*Metrics)
if !ok {
return nil, balloonsError("type mismatch in balloons metrics")
func (m *Metrics) Describe(ch chan<- *prometheus.Desc) {
for _, d := range descriptors {
ch <- d
}
}

func (m *Metrics) Collect(ch chan<- prometheus.Metric) {
if m == nil {
return
}
promMetrics := make([]prometheus.Metric, len(metrics.Balloons))
for index, bm := range metrics.Balloons {
promMetrics[index] = prometheus.MustNewConstMetric(

for _, bm := range m.Balloons {
ch <- prometheus.MustNewConstMetric(
descriptors[balloonsDesc],
prometheus.GaugeValue,
float64(bm.Cpus.Size()),
Expand All @@ -185,5 +181,4 @@ func (p *balloons) CollectMetrics(m policy.Metrics) ([]prometheus.Metric, error)
bm.ContainerNames,
strconv.Itoa(bm.ContainerReqMilliCpus))
}
return promMetrics, nil
}
26 changes: 13 additions & 13 deletions cmd/plugins/template/policy/template-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,19 +116,9 @@ func (p *policy) HandleEvent(e *events.Policy) (bool, error) {
return true, nil
}

// DescribeMetrics generates policy-specific prometheus metrics data descriptors.
func (p *policy) DescribeMetrics() []*prometheus.Desc {
return nil
}

// PollMetrics provides policy metrics for monitoring.
func (p *policy) PollMetrics() policyapi.Metrics {
return nil
}

// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data.
func (p *policy) CollectMetrics(policyapi.Metrics) ([]prometheus.Metric, error) {
return nil, nil
// GetMetrics returns the policy-specific metrics collector.
func (p *policy) GetMetrics() policyapi.Metrics {
return &NoMetrics{}
}

// GetTopologyZones returns the policy/pool data for 'topology zone' CRDs.
Expand All @@ -145,3 +135,13 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string {
func (p *policy) initialize() error {
return nil
}

type NoMetrics struct{}

func (*NoMetrics) Describe(chan<- *prometheus.Desc) {
return
}

func (*NoMetrics) Collect(chan<- prometheus.Metric) {
return
}
39 changes: 22 additions & 17 deletions cmd/plugins/topology-aware/policy/topology-aware-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ import (
"fmt"

"github.com/containers/nri-plugins/pkg/utils/cpuset"
"k8s.io/apimachinery/pkg/api/resource"

"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/api/resource"

cfgapi "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware"
"github.com/containers/nri-plugins/pkg/cpuallocator"
Expand Down Expand Up @@ -68,6 +67,7 @@ type policy struct {
cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy
memAllocator *libmem.Allocator
coldstartOff bool // coldstart forced off (have movable PMEM zones)
metrics *TopologyAwareMetrics
}

var opt = &cfgapi.Config{}
Expand Down Expand Up @@ -115,6 +115,8 @@ func (p *policy) Setup(opts *policyapi.BackendOptions) error {
return policyError("failed to initialize %s policy: %w", PolicyName, err)
}

p.metrics = p.NewTopologyAwareMetrics()

log.Info("***** default CPU priority is %s", defaultPrio)

return nil
Expand Down Expand Up @@ -306,21 +308,6 @@ func (p *policy) HandleEvent(e *events.Policy) (bool, error) {
return false, nil
}

// DescribeMetrics generates policy-specific prometheus metrics data descriptors.
func (p *policy) DescribeMetrics() []*prometheus.Desc {
return nil
}

// PollMetrics provides policy metrics for monitoring.
func (p *policy) PollMetrics() policyapi.Metrics {
return nil
}

// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data.
func (p *policy) CollectMetrics(policyapi.Metrics) ([]prometheus.Metric, error) {
return nil, nil
}

// GetTopologyZones returns the policy/pool data for 'topology zone' CRDs.
func (p *policy) GetTopologyZones() []*policyapi.TopologyZone {
zones := []*policyapi.TopologyZone{}
Expand Down Expand Up @@ -435,6 +422,24 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string {
return data
}

func (p *policy) GetMetrics() policyapi.Metrics {
return p.metrics
}

func (p *policy) NewTopologyAwareMetrics() *TopologyAwareMetrics {
return &TopologyAwareMetrics{}
}

type TopologyAwareMetrics struct{}

func (*TopologyAwareMetrics) Describe(ch chan<- *prometheus.Desc) {
return
}

func (*TopologyAwareMetrics) Collect(ch chan<- prometheus.Metric) {
return
}

// reallocateResources reallocates the given containers using the given pool hints
func (p *policy) reallocateResources(containers []cache.Container, pools map[string]string) error {
errs := []error{}
Expand Down
16 changes: 2 additions & 14 deletions pkg/resmgr/policy/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,9 @@ func (c *PolicyCollector) register() error {
}

func (c *PolicyCollector) Describe(ch chan<- *prometheus.Desc) {
for _, d := range c.policy.active.DescribeMetrics() {
ch <- d
}
c.policy.active.GetMetrics().Describe(ch)
}

func (c *PolicyCollector) Collect(ch chan<- prometheus.Metric) {
polled := c.policy.active.PollMetrics()

collected, err := c.policy.active.CollectMetrics(polled)
if err != nil {
log.Error("failed to collect metrics: %v", err)
return
}

for _, m := range collected {
ch <- m
}
c.policy.active.GetMetrics().Collect(ch)
}
14 changes: 7 additions & 7 deletions pkg/resmgr/policy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,8 @@ type Backend interface {
HandleEvent(*events.Policy) (bool, error)
// ExportResourceData provides resource data to export for the container.
ExportResourceData(cache.Container) map[string]string
// DescribeMetrics generates policy-specific prometheus metrics data descriptors.
DescribeMetrics() []*prometheus.Desc
// PollMetrics provides policy metrics for monitoring.
PollMetrics() Metrics
// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data.
CollectMetrics(Metrics) ([]prometheus.Metric, error)
// GetMetrics returns the policy-specific metrics collector.
GetMetrics() Metrics
// GetTopologyZones returns the policy/pool data for 'topology zone' CRDs.
GetTopologyZones() []*TopologyZone
}
Expand Down Expand Up @@ -151,7 +147,11 @@ type Policy interface {
GetTopologyZones() []*TopologyZone
}

type Metrics interface{}
// Metrics is the interface we expect policy-specific metrics to implement.
type Metrics interface {
Describe(chan<- *prometheus.Desc)
Collect(chan<- prometheus.Metric)
}

// Node resource topology resource and attribute names.
// XXX TODO(klihub): We'll probably need to add similar unified consts
Expand Down

0 comments on commit ea333a0

Please sign in to comment.