Skip to content

Commit

Permalink
Merge pull request #1284 from flanksource/metrics-debug
Browse files Browse the repository at this point in the history
chore: improve metrics troubleshooting
  • Loading branch information
moshloop authored Oct 2, 2023
2 parents fe33c51 + 9d227e9 commit 8fbe884
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 74 deletions.
2 changes: 1 addition & 1 deletion api/context/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ func New(client *kommons.Client, kubernetes kubernetes.Interface, db *gorm.DB, c
}

func (ctx *Context) IsDebug() bool {
return ctx.Canary.IsDebug()
return ctx.Canary.IsDebug() || ctx.IsTrace()
}

func (ctx *Context) IsTrace() bool {
Expand Down
169 changes: 101 additions & 68 deletions checks/metrics.go
Original file line number Diff line number Diff line change
@@ -1,43 +1,96 @@
package checks

import (
"encoding/json"
"fmt"
"sort"
"strconv"
"strings"
"time"

"github.com/flanksource/canary-checker/api/context"
"github.com/flanksource/canary-checker/api/external"
v1 "github.com/flanksource/canary-checker/api/v1"
"github.com/flanksource/canary-checker/pkg"
"github.com/flanksource/commons/logger"
"github.com/prometheus/client_golang/prometheus"
)

var collectorMap = make(map[string]prometheus.Collector)

func addPrometheusMetric(name, metricType string, labelNames []string) prometheus.Collector {
func getOrAddPrometheusMetric(name, metricType string, labelNames []string) (prometheus.Collector, error) {
key := name + metricType + strings.Join(labelNames, ",")
if collector, exists := collectorMap[key]; exists {
return collector, nil
}
var collector prometheus.Collector
switch metricType {
case "histogram":
collector = prometheus.NewHistogramVec(
prometheus.HistogramOpts{Name: name},
labelNames,
)
prometheus.HistogramOpts{Name: name}, labelNames)
case "counter":
collector = prometheus.NewCounterVec(
prometheus.CounterOpts{Name: name},
labelNames,
)
prometheus.CounterOpts{Name: name}, labelNames)
case "gauge":
collector = prometheus.NewGaugeVec(
prometheus.GaugeOpts{Name: name},
labelNames,
)
prometheus.GaugeOpts{Name: name}, labelNames)
default:
return nil
return nil, fmt.Errorf("unknown metric type %s", metricType)
}

collectorMap[name] = collector
prometheus.MustRegister(collector)
return collector
collectorMap[key] = collector
return collector, prometheus.Register(collector)
}

func getWithEnvironment(ctx *context.Context, r *pkg.CheckResult) *context.Context {
templateInput := map[string]any{
"result": r.Data,
"canary": map[string]any{
"name": r.Canary.GetName(),
"namespace": r.Canary.GetNamespace(),
"labels": r.Canary.GetLabels(),
"id": r.Canary.GetPersistedID(),
},
"check": map[string]any{
"name": r.Check.GetName(),
"id": r.Canary.GetCheckID(r.Check.GetName()),
"description": r.Check.GetDescription(),
"labels": r.Check.GetLabels(),
"endpoint": r.Check.GetEndpoint(),
"duration": time.Millisecond * time.Duration(r.GetDuration()),
},
}
return ctx.New(templateInput)
}

func getLabels(ctx *context.Context, metric external.Metrics) (map[string]string, []string, error) {
var labels = make(map[string]string)
var names = []string{}
for _, label := range metric.Labels {
val := label.Value
if label.ValueExpr != "" {
var err error
val, err = template(ctx, v1.Template{Expression: label.ValueExpr})
if err != nil {
return nil, nil, err
}
}
labels[label.Name] = val
names = append(names, label.Name)
}
sort.Strings(names)
return labels, names, nil
}

func getLabelString(labels map[string]string) string {
s := "{"
for k, v := range labels {
if s != "{" {
s += ", "
}
s += fmt.Sprintf("%s=%s", k, v)
}
s += "}"

return s
}

func exportCheckMetrics(ctx *context.Context, results pkg.Results) {
Expand All @@ -51,77 +104,57 @@ func exportCheckMetrics(ctx *context.Context, results pkg.Results) {
continue
}

var collector prometheus.Collector
var exists bool
if collector, exists = collectorMap[spec.Name]; !exists {
collector = addPrometheusMetric(spec.Name, spec.Type, spec.Labels.Names())
if collector == nil {
logger.Errorf("Invalid type for check.metrics %s for check[%s]", spec.Type, r.Check.GetName())
continue
}
}
ctx = getWithEnvironment(ctx, r)

// Convert result Data into JSON for templating
var rData map[string]any
resultBytes, err := json.Marshal(r.Data)
if err != nil {
logger.Errorf("Error converting check result data into json: %v", err)
continue
}
if err := json.Unmarshal(resultBytes, &rData); err != nil {
logger.Errorf("Error converting check result data into json: %v", err)
var err error
var labels map[string]string
var labelNames []string
if labels, labelNames, err = getLabels(ctx, spec); err != nil {
r.ErrorMessage(err)
continue
}

tplValue := v1.Template{Expression: spec.Value}
templateInput := map[string]any{
"result": rData,
"check": map[string]any{
"name": r.Check.GetName(),
"description": r.Check.GetDescription(),
"labels": r.Check.GetLabels(),
"endpoint": r.Check.GetEndpoint(),
"duration": r.GetDuration(),
},
}

valRaw, err := template(ctx.New(templateInput), tplValue)
if err != nil {
logger.Errorf("Error templating value for check.metrics template %s for check[%s]: %v", spec.Value, r.Check.GetName(), err)
var collector prometheus.Collector
if collector, err = getOrAddPrometheusMetric(spec.Name, spec.Type, labelNames); err != nil {
r.ErrorMessage(err)
continue
}
val, err := strconv.ParseFloat(valRaw, 64)
if err != nil {
logger.Errorf("Error converting value %s to float for check.metrics template %s for check[%s]: %v", valRaw, spec.Value, r.Check.GetName(), err)

var val float64
if val, err = getMetricValue(ctx, spec); err != nil {
r.ErrorMessage(err)
continue
}

var orderedLabelVals []string
for _, label := range spec.Labels {
val := label.Value
if label.ValueExpr != "" {
var err error
val, err = template(ctx.New(templateInput), v1.Template{Expression: label.ValueExpr})
if err != nil {
logger.Errorf("Error templating label %s:%s for check.metrics for check[%s]: %v", label.Name, label.ValueExpr, r.Check.GetName(), err)
}
}
orderedLabelVals = append(orderedLabelVals, val)
if ctx.IsDebug() {
ctx.Debugf("%s%v=%0.3f", spec.Name, getLabelString(labels), val)
}

switch collector := collector.(type) {
case *prometheus.HistogramVec:
collector.WithLabelValues(orderedLabelVals...).Observe(val)
collector.With(labels).Observe(val)
case *prometheus.GaugeVec:
collector.WithLabelValues(orderedLabelVals...).Set(val)
collector.With(labels).Set(val)
case *prometheus.CounterVec:
if val <= 0 {
continue
}
collector.WithLabelValues(orderedLabelVals...).Add(val)
default:
logger.Errorf("Got unknown type for check.metrics %T", collector)
collector.With(labels).Add(val)
}
}
}
}

func getMetricValue(ctx *context.Context, spec external.Metrics) (float64, error) {
tplValue := v1.Template{Expression: spec.Value}

valRaw, err := template(ctx, tplValue)
if err != nil {
return 0, err
}
val, err := strconv.ParseFloat(valRaw, 64)
if err != nil {
return 0, fmt.Errorf("%s is not a number", valRaw)
}
return val, nil
}
41 changes: 41 additions & 0 deletions fixtures/minimal/metrics-multiple.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: canaries.flanksource.com/v1
kind: Canary
metadata:
name: exchange-rates
annotations:
trace: "true"
spec:
schedule: "every 30 @minutes"
http:
- name: exchange-rates
url: https://api.frankfurter.app/latest?from=USD&to=GBP,EUR,ILS
metrics:
- name: exchange_rate
type: gauge
value: result.json.rates.GBP
labels:
- name: "from"
value: "USD"
- name: to
value: GBP

- name: exchange_rate
type: gauge
value: result.json.rates.EUR
labels:
- name: "from"
value: "USD"
- name: to
value: EUR

- name: exchange_rate
type: gauge
value: result.json.rates.ILS
labels:
- name: "from"
value: "USD"
- name: to
value: ILS
- name: exchange_rate_api
type: histogram
value: result.elapsed.getMilliseconds()
26 changes: 26 additions & 0 deletions fixtures/minimal/metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: canaries.flanksource.com/v1
kind: Canary
metadata:
name: http-pass-single
annotations:
trace: "true"
spec:
interval: 30
http:
- name: http-minimal-check
url: https://httpbin.demo.aws.flanksource.com/status/200
metrics:
- name: httpbin_count
type: counter
value: "1"
labels:
- name: check_name
valueExpr: check.name
- name: code
valueExpr: result.code
- name: httpbin_2xx_duration
type: counter
value: result.elapsed.getMilliseconds()
labels:
- name: check_name
valueExpr: check.name
13 changes: 12 additions & 1 deletion pkg/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,18 @@ type Metric struct {
}

func (m Metric) String() string {
return fmt.Sprintf("%s=%d", m.Name, int(m.Value))
labels := ""
if len(m.Labels) > 0 {
labels = "{"
for k, v := range m.Labels {
if labels != "{" {
labels += ", "
}
labels += fmt.Sprintf("%s=%s", k, v)
}
labels += "}"
}
return fmt.Sprintf("%s%s=%d", m.Name, labels, int(m.Value))
}

func (e Endpoint) GetEndpoint() string {
Expand Down
Loading

0 comments on commit 8fbe884

Please sign in to comment.