diff --git a/api/context/context.go b/api/context/context.go index 2324a369b..0d2dde715 100644 --- a/api/context/context.go +++ b/api/context/context.go @@ -212,7 +212,7 @@ func New(client *kommons.Client, kubernetes kubernetes.Interface, db *gorm.DB, c } func (ctx *Context) IsDebug() bool { - return ctx.Canary.IsDebug() + return ctx.Canary.IsDebug() || ctx.IsTrace() } func (ctx *Context) IsTrace() bool { diff --git a/checks/metrics.go b/checks/metrics.go index b320f949e..81220710b 100644 --- a/checks/metrics.go +++ b/checks/metrics.go @@ -1,43 +1,96 @@ package checks import ( - "encoding/json" + "fmt" + "sort" "strconv" + "strings" + "time" "github.com/flanksource/canary-checker/api/context" + "github.com/flanksource/canary-checker/api/external" v1 "github.com/flanksource/canary-checker/api/v1" "github.com/flanksource/canary-checker/pkg" - "github.com/flanksource/commons/logger" "github.com/prometheus/client_golang/prometheus" ) var collectorMap = make(map[string]prometheus.Collector) -func addPrometheusMetric(name, metricType string, labelNames []string) prometheus.Collector { +func getOrAddPrometheusMetric(name, metricType string, labelNames []string) (prometheus.Collector, error) { + key := name + metricType + strings.Join(labelNames, ",") + if collector, exists := collectorMap[key]; exists { + return collector, nil + } var collector prometheus.Collector switch metricType { case "histogram": collector = prometheus.NewHistogramVec( - prometheus.HistogramOpts{Name: name}, - labelNames, - ) + prometheus.HistogramOpts{Name: name}, labelNames) case "counter": collector = prometheus.NewCounterVec( - prometheus.CounterOpts{Name: name}, - labelNames, - ) + prometheus.CounterOpts{Name: name}, labelNames) case "gauge": collector = prometheus.NewGaugeVec( - prometheus.GaugeOpts{Name: name}, - labelNames, - ) + prometheus.GaugeOpts{Name: name}, labelNames) default: - return nil + return nil, fmt.Errorf("unknown metric type %s", metricType) } - collectorMap[name] = collector - prometheus.MustRegister(collector) - return collector + collectorMap[key] = collector + return collector, prometheus.Register(collector) +} + +func getWithEnvironment(ctx *context.Context, r *pkg.CheckResult) *context.Context { + templateInput := map[string]any{ + "result": r.Data, + "canary": map[string]any{ + "name": r.Canary.GetName(), + "namespace": r.Canary.GetNamespace(), + "labels": r.Canary.GetLabels(), + "id": r.Canary.GetPersistedID(), + }, + "check": map[string]any{ + "name": r.Check.GetName(), + "id": r.Canary.GetCheckID(r.Check.GetName()), + "description": r.Check.GetDescription(), + "labels": r.Check.GetLabels(), + "endpoint": r.Check.GetEndpoint(), + "duration": time.Millisecond * time.Duration(r.GetDuration()), + }, + } + return ctx.New(templateInput) +} + +func getLabels(ctx *context.Context, metric external.Metrics) (map[string]string, []string, error) { + var labels = make(map[string]string) + var names = []string{} + for _, label := range metric.Labels { + val := label.Value + if label.ValueExpr != "" { + var err error + val, err = template(ctx, v1.Template{Expression: label.ValueExpr}) + if err != nil { + return nil, nil, err + } + } + labels[label.Name] = val + names = append(names, label.Name) + } + sort.Strings(names) + return labels, names, nil +} + +func getLabelString(labels map[string]string) string { + s := "{" + for k, v := range labels { + if s != "{" { + s += ", " + } + s += fmt.Sprintf("%s=%s", k, v) + } + s += "}" + + return s } func exportCheckMetrics(ctx *context.Context, results pkg.Results) { @@ -51,77 +104,57 @@ func exportCheckMetrics(ctx *context.Context, results pkg.Results) { continue } - var collector prometheus.Collector - var exists bool - if collector, exists = collectorMap[spec.Name]; !exists { - collector = addPrometheusMetric(spec.Name, spec.Type, spec.Labels.Names()) - if collector == nil { - logger.Errorf("Invalid type for check.metrics %s for check[%s]", spec.Type, r.Check.GetName()) - continue - } - } + ctx = getWithEnvironment(ctx, r) - // Convert result Data into JSON for templating - var rData map[string]any - resultBytes, err := json.Marshal(r.Data) - if err != nil { - logger.Errorf("Error converting check result data into json: %v", err) - continue - } - if err := json.Unmarshal(resultBytes, &rData); err != nil { - logger.Errorf("Error converting check result data into json: %v", err) + var err error + var labels map[string]string + var labelNames []string + if labels, labelNames, err = getLabels(ctx, spec); err != nil { + r.ErrorMessage(err) continue } - tplValue := v1.Template{Expression: spec.Value} - templateInput := map[string]any{ - "result": rData, - "check": map[string]any{ - "name": r.Check.GetName(), - "description": r.Check.GetDescription(), - "labels": r.Check.GetLabels(), - "endpoint": r.Check.GetEndpoint(), - "duration": r.GetDuration(), - }, - } - - valRaw, err := template(ctx.New(templateInput), tplValue) - if err != nil { - logger.Errorf("Error templating value for check.metrics template %s for check[%s]: %v", spec.Value, r.Check.GetName(), err) + var collector prometheus.Collector + if collector, err = getOrAddPrometheusMetric(spec.Name, spec.Type, labelNames); err != nil { + r.ErrorMessage(err) continue } - val, err := strconv.ParseFloat(valRaw, 64) - if err != nil { - logger.Errorf("Error converting value %s to float for check.metrics template %s for check[%s]: %v", valRaw, spec.Value, r.Check.GetName(), err) + + var val float64 + if val, err = getMetricValue(ctx, spec); err != nil { + r.ErrorMessage(err) continue } - var orderedLabelVals []string - for _, label := range spec.Labels { - val := label.Value - if label.ValueExpr != "" { - var err error - val, err = template(ctx.New(templateInput), v1.Template{Expression: label.ValueExpr}) - if err != nil { - logger.Errorf("Error templating label %s:%s for check.metrics for check[%s]: %v", label.Name, label.ValueExpr, r.Check.GetName(), err) - } - } - orderedLabelVals = append(orderedLabelVals, val) + if ctx.IsDebug() { + ctx.Debugf("%s%v=%0.3f", spec.Name, getLabelString(labels), val) } switch collector := collector.(type) { case *prometheus.HistogramVec: - collector.WithLabelValues(orderedLabelVals...).Observe(val) + collector.With(labels).Observe(val) case *prometheus.GaugeVec: - collector.WithLabelValues(orderedLabelVals...).Set(val) + collector.With(labels).Set(val) case *prometheus.CounterVec: if val <= 0 { continue } - collector.WithLabelValues(orderedLabelVals...).Add(val) - default: - logger.Errorf("Got unknown type for check.metrics %T", collector) + collector.With(labels).Add(val) } } } } + +func getMetricValue(ctx *context.Context, spec external.Metrics) (float64, error) { + tplValue := v1.Template{Expression: spec.Value} + + valRaw, err := template(ctx, tplValue) + if err != nil { + return 0, err + } + val, err := strconv.ParseFloat(valRaw, 64) + if err != nil { + return 0, fmt.Errorf("%s is not a number", valRaw) + } + return val, nil +} diff --git a/fixtures/minimal/metrics-multiple.yaml b/fixtures/minimal/metrics-multiple.yaml new file mode 100644 index 000000000..d8fdd9ef5 --- /dev/null +++ b/fixtures/minimal/metrics-multiple.yaml @@ -0,0 +1,41 @@ +apiVersion: canaries.flanksource.com/v1 +kind: Canary +metadata: + name: exchange-rates + annotations: + trace: "true" +spec: + schedule: "every 30 @minutes" + http: + - name: exchange-rates + url: https://api.frankfurter.app/latest?from=USD&to=GBP,EUR,ILS + metrics: + - name: exchange_rate + type: gauge + value: result.json.rates.GBP + labels: + - name: "from" + value: "USD" + - name: to + value: GBP + + - name: exchange_rate + type: gauge + value: result.json.rates.EUR + labels: + - name: "from" + value: "USD" + - name: to + value: EUR + + - name: exchange_rate + type: gauge + value: result.json.rates.ILS + labels: + - name: "from" + value: "USD" + - name: to + value: ILS + - name: exchange_rate_api + type: histogram + value: result.elapsed.getMilliseconds() diff --git a/fixtures/minimal/metrics.yaml b/fixtures/minimal/metrics.yaml new file mode 100644 index 000000000..84d4b0e21 --- /dev/null +++ b/fixtures/minimal/metrics.yaml @@ -0,0 +1,26 @@ +apiVersion: canaries.flanksource.com/v1 +kind: Canary +metadata: + name: http-pass-single + annotations: + trace: "true" +spec: + interval: 30 + http: + - name: http-minimal-check + url: https://httpbin.demo.aws.flanksource.com/status/200 + metrics: + - name: httpbin_count + type: counter + value: "1" + labels: + - name: check_name + valueExpr: check.name + - name: code + valueExpr: result.code + - name: httpbin_2xx_duration + type: counter + value: result.elapsed.getMilliseconds() + labels: + - name: check_name + valueExpr: check.name diff --git a/pkg/api.go b/pkg/api.go index 2062c4f4f..29bb3c81f 100644 --- a/pkg/api.go +++ b/pkg/api.go @@ -524,7 +524,18 @@ type Metric struct { } func (m Metric) String() string { - return fmt.Sprintf("%s=%d", m.Name, int(m.Value)) + labels := "" + if len(m.Labels) > 0 { + labels = "{" + for k, v := range m.Labels { + if labels != "{" { + labels += ", " + } + labels += fmt.Sprintf("%s=%s", k, v) + } + labels += "}" + } + return fmt.Sprintf("%s%s=%d", m.Name, labels, int(m.Value)) } func (e Endpoint) GetEndpoint() string { diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 9f78977cd..1b091b857 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,7 +1,7 @@ package metrics import ( - "strconv" + "fmt" "time" "github.com/asecurityteam/rolling" @@ -18,6 +18,10 @@ var ( GaugeType pkg.MetricType = "gauge" HistogramType pkg.MetricType = "histogram" + CustomGauges map[string]*prometheus.GaugeVec + CustomCounters map[string]*prometheus.CounterVec + CustomHistograms map[string]*prometheus.HistogramVec + OpsCount = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "canary_check_count", @@ -99,6 +103,9 @@ var latencies = cmap.New() func init() { prometheus.MustRegister(Gauge, CanaryCheckInfo, OpsCount, OpsSuccessCount, OpsFailedCount, RequestLatency, GenericGauge, GenericCounter, GenericHistogram) + CustomCounters = make(map[string]*prometheus.CounterVec) + CustomGauges = make(map[string]*prometheus.GaugeVec) + CustomHistograms = make(map[string]*prometheus.HistogramVec) } func RemoveCheck(checks v1.Canary) { @@ -199,11 +206,20 @@ func Record(canary v1.Canary, result *pkg.CheckResult) (_uptime pkg.Uptime, _lat for _, m := range result.Metrics { switch m.Type { case CounterType: - GenericCounter.WithLabelValues(checkType, endpoint, m.Name, strconv.Itoa(int(m.Value)), canaryNamespace, owner, severity, key, name).Inc() + if err := getOrCreateCounter(m); err != nil { + result.ErrorMessage(fmt.Errorf("cannot create counter %s with labels %v", m.Name, m.Labels)) + } + case GaugeType: - GenericGauge.WithLabelValues(checkType, endpoint, m.Name, canaryNamespace, owner, severity, key, name).Set(m.Value) + getOrCreateGauge(m) + if err := getOrCreateGauge(m); err != nil { + result.ErrorMessage(fmt.Errorf("cannot create gauge %s with labels %v", m.Name, m.Labels)) + } + case HistogramType: - GenericHistogram.WithLabelValues(checkType, endpoint, m.Name, canaryNamespace, owner, severity, key, name).Observe(m.Value) + if err := getOrCreateHistogram(m); err != nil { + result.ErrorMessage(fmt.Errorf("cannot create histogram %s with labels %v", m.Name, m.Labels)) + } } } } else { @@ -222,6 +238,62 @@ func Record(canary v1.Canary, result *pkg.CheckResult) (_uptime pkg.Uptime, _lat return _uptime, _latency } +func getOrCreateGauge(m pkg.Metric) (e any) { + defer func() { + e = recover() + }() + + var gauge *prometheus.GaugeVec + var ok bool + if gauge, ok = CustomGauges[m.Name]; !ok { + gauge = prometheus.V2.NewGaugeVec(prometheus.GaugeVecOpts{ + GaugeOpts: prometheus.GaugeOpts{ + Name: m.Name, + }, + }) + CustomGauges[m.Name] = gauge + } + + gauge.With(m.Labels).Set(m.Value) + return nil +} + +func getOrCreateCounter(m pkg.Metric) (e any) { + defer func() { + e = recover() + }() + var counter *prometheus.CounterVec + var ok bool + if counter, ok = CustomCounters[m.Name]; !ok { + counter = prometheus.V2.NewCounterVec(prometheus.CounterVecOpts{ + CounterOpts: prometheus.CounterOpts{ + Name: m.Name, + }, + }) + CustomCounters[m.Name] = counter + } + counter.With(m.Labels).Add(m.Value) + return nil +} + +func getOrCreateHistogram(m pkg.Metric) (e any) { + defer func() { + e = recover() + }() + var histogram *prometheus.HistogramVec + var ok bool + if histogram, ok = CustomHistograms[m.Name]; !ok { + histogram = prometheus.V2.NewHistogramVec(prometheus.HistogramVecOpts{ + HistogramOpts: prometheus.HistogramOpts{ + Name: m.Name, + }, + }) + CustomHistograms[m.Name] = histogram + } + histogram.With(m.Labels).Observe(m.Value) + return nil +} + func FillLatencies(checkKey string, duration string, latency *pkg.Latency) error { if runner.Prometheus == nil || duration == "" { return nil