From fe5ad76843073b6d3ee458c8ce56a14df695479c Mon Sep 17 00:00:00 2001 From: David MICHENEAU Date: Fri, 11 Oct 2024 17:02:51 +0200 Subject: [PATCH] refactor: simplify the use of metrics refactor: metrics names and use refactor: simplify the use of metrics refactor: metrics names and use test: add unit test for metrics actions fix: add create for mutatingwebhook test: add unit test for counter --- cmd/admission-controller/main.go | 27 +-- .../webhook-configuration.go | 5 +- cmd/admission-controller/webhook.go | 47 +++- cmd/kimup/scheduler.go | 49 +++- go.mod | 1 + internal/httpserver/httpserver.go | 2 +- internal/metrics/actions.go | 81 +++++++ .../metrics/admission-controller-patch.go | 52 ++++ internal/metrics/admission-controller.go | 52 ++++ internal/metrics/events.go | 53 +++++ internal/metrics/metrics.go | 129 +++++++++- internal/metrics/registry.go | 52 ++++ internal/metrics/rules.go | 51 ++++ internal/metrics/tags.go | 52 ++++ test/metrics/metrics_test.go | 223 ++++++++++++++++++ 15 files changed, 828 insertions(+), 48 deletions(-) create mode 100644 internal/metrics/actions.go create mode 100644 internal/metrics/admission-controller-patch.go create mode 100644 internal/metrics/admission-controller.go create mode 100644 internal/metrics/events.go create mode 100644 internal/metrics/registry.go create mode 100644 internal/metrics/rules.go create mode 100644 internal/metrics/tags.go create mode 100644 test/metrics/metrics_test.go diff --git a/cmd/admission-controller/main.go b/cmd/admission-controller/main.go index 7da812c..895dee9 100644 --- a/cmd/admission-controller/main.go +++ b/cmd/admission-controller/main.go @@ -4,13 +4,10 @@ import ( "context" "crypto/tls" "flag" - "net" "os" "os/signal" "syscall" - "time" - "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/serializer" @@ -18,15 +15,14 @@ import ( "github.com/orange-cloudavenue/kube-image-updater/internal/httpserver" client "github.com/orange-cloudavenue/kube-image-updater/internal/kubeclient" "github.com/orange-cloudavenue/kube-image-updater/internal/log" - "github.com/orange-cloudavenue/kube-image-updater/internal/metrics" ) var ( insideCluster bool = true // running inside k8s cluster - webhookNamespace string = "example.com" - webhookServiceName string = "your" - webhookConfigName string = "webhookconfig" + webhookNamespace string = "nip.io" + webhookServiceName string = "192-168-1-23" + webhookConfigName string = "mutating-webhook-configuration" webhookPathMutate string = "/mutate" webhookPort string = ":8443" webhookBase = webhookServiceName + "." + webhookNamespace @@ -36,13 +32,7 @@ var ( deserializer = codecs.UniversalDeserializer() kubeClient client.Interface - manifestWebhookPath string = "./config/manifests/mutatingWebhookConfiguration.yaml" - - // Prometheus metrics - promHTTPRequestsTotal prometheus.Counter = metrics.NewCounter("http_requests_total", "The total number of handled HTTP requests.") - promHTTPErrorsTotal prometheus.Counter = metrics.NewCounter("http_errors_total", "The total number of handled HTTP errors.") - promHTTPDuration prometheus.Histogram = metrics.NewHistogram("http_response_time_seconds", "The duration in seconds of HTTP requests.") - promPatchTotal prometheus.Counter = metrics.NewCounter("patch_total", "The total number of requests to a patch.") + manifestWebhookPath string = "./examples/mutatingWebhookConfiguration.yaml" ) func init() { @@ -96,14 +86,7 @@ func main() { } // * Config the webhook server - a, waitHTTP := httpserver.Init(ctx, httpserver.WithCustomHandlerForHealth( - func() (bool, error) { - _, err := net.DialTimeout("tcp", ":4444", 5*time.Second) - if err != nil { - return false, err - } - return true, nil - })) + a, waitHTTP := httpserver.Init(ctx) s, err := a.Add("webhook", httpserver.WithTLS(tlsC), httpserver.WithAddr(webhookPort)) if err != nil { diff --git a/cmd/admission-controller/webhook-configuration.go b/cmd/admission-controller/webhook-configuration.go index d479465..a49921b 100644 --- a/cmd/admission-controller/webhook-configuration.go +++ b/cmd/admission-controller/webhook-configuration.go @@ -49,7 +49,7 @@ func createOrUpdateMutatingWebhookConfiguration(caPEM *bytes.Buffer, webhookServ Name: webhookConfigName, }, Webhooks: []admissionregistrationv1.MutatingWebhook{{ - Name: webhookService + "." + webhookNamespace + ".svc", + Name: webhookService + "." + webhookNamespace, AdmissionReviewVersions: []string{"v1", "v1beta1"}, SideEffects: &sideEffect, ClientConfig: clientConfig, @@ -58,11 +58,14 @@ func createOrUpdateMutatingWebhookConfiguration(caPEM *bytes.Buffer, webhookServ { Operations: []admissionregistrationv1.OperationType{ admissionregistrationv1.Update, + admissionregistrationv1.Create, }, Rule: admissionregistrationv1.Rule{ APIGroups: []string{""}, APIVersions: []string{"v1"}, Resources: []string{"pods"}, + // TODO - add namespace scope + // Scope: "*", }, }, }, diff --git a/cmd/admission-controller/webhook.go b/cmd/admission-controller/webhook.go index da32d88..c7b4ce3 100644 --- a/cmd/admission-controller/webhook.go +++ b/cmd/admission-controller/webhook.go @@ -7,7 +7,6 @@ import ( "io" "net/http" - "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" @@ -16,16 +15,16 @@ import ( "github.com/orange-cloudavenue/kube-image-updater/api/v1alpha1" "github.com/orange-cloudavenue/kube-image-updater/internal/annotations" "github.com/orange-cloudavenue/kube-image-updater/internal/log" + "github.com/orange-cloudavenue/kube-image-updater/internal/metrics" "github.com/orange-cloudavenue/kube-image-updater/internal/patch" ) // func serveHandler func ServeHandler(w http.ResponseWriter, r *http.Request) { - // start the timer - timer := prometheus.NewTimer(promHTTPDuration) - defer timer.ObserveDuration() - // increment the totalRequests counter - promHTTPRequestsTotal.Inc() + // Prometheus metrics + metrics.AdmissionController().Total().Inc() + timeAC := metrics.AdmissionController().Duration() + defer timeAC.ObserveDuration() var body []byte if r.Body != nil { @@ -34,7 +33,9 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) { } } if len(body) == 0 { - promHTTPErrorsTotal.Inc() + // increment the total number of errors + metrics.AdmissionController().TotalErr().Inc() + log.Error("empty body") http.Error(w, "empty body", http.StatusBadRequest) return @@ -43,7 +44,9 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) { // verify the content type is accurate contentType := r.Header.Get("Content-Type") if contentType != "application/json" { - promHTTPErrorsTotal.Inc() + // increment the total number of errors + metrics.AdmissionController().TotalErr().Inc() + http.Error(w, "invalid Content-Type, expect `application/json`", http.StatusUnsupportedMediaType) return } @@ -51,7 +54,9 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) { var admissionResponse *admissionv1.AdmissionResponse ar := admissionv1.AdmissionReview{} if _, _, err := deserializer.Decode(body, nil, &ar); err != nil { - promHTTPErrorsTotal.Inc() + // increment the total number of errors + metrics.AdmissionController().TotalErr().Inc() + log.WithError(err).Warn("Can't decode body") admissionResponse = &admissionv1.AdmissionResponse{ Result: &metav1.Status{ @@ -77,11 +82,15 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) { resp, err := json.Marshal(admissionReview) if err != nil { - promHTTPErrorsTotal.Inc() + // increment the total number of errors + metrics.AdmissionController().TotalErr().Inc() + http.Error(w, fmt.Sprintf("could not encode response: %v", err), http.StatusInternalServerError) } if _, err := w.Write(resp); err != nil { - promHTTPErrorsTotal.Inc() + // increment the total number of errors + metrics.AdmissionController().TotalErr().Inc() + http.Error(w, fmt.Sprintf("could not write response: %v", err), http.StatusInternalServerError) } } @@ -128,10 +137,18 @@ func mutate(ctx context.Context, ar *admissionv1.AdmissionReview) *admissionv1.A // create mutation patch for pod. func createPatch(ctx context.Context, pod *corev1.Pod) ([]byte, error) { + // Metrics - increment the total number of patch + metrics.AdmissionControllerPatch().Total().Inc() + timePatch := metrics.AdmissionControllerPatch().Duration() + defer timePatch.ObserveDuration() + var err error // find annotation enabled an := annotations.New(ctx, pod) if !an.Enabled().Get() { + // increment the total number of errors + metrics.AdmissionControllerPatch().TotalErr().Inc() + return nil, fmt.Errorf("annotation not enabled") } @@ -154,6 +171,9 @@ func createPatch(ctx context.Context, pod *corev1.Pod) ([]byte, error) { // find the image associated with the pod image, err = kubeClient.Image().Find(ctx, pod.Namespace, container.Image) if err != nil { + // increment the total number of errors + metrics.AdmissionControllerPatch().TotalErr().Inc() + log. WithFields(logrus.Fields{ "Namespace": pod.Namespace, @@ -166,6 +186,9 @@ func createPatch(ctx context.Context, pod *corev1.Pod) ([]byte, error) { } else { image, err = kubeClient.Image().Get(ctx, pod.Namespace, crdName) if err != nil { + // increment the total number of errors + metrics.AdmissionControllerPatch().TotalErr().Inc() + log. WithFields(logrus.Fields{ "Namespace": pod.Namespace, @@ -179,8 +202,6 @@ func createPatch(ctx context.Context, pod *corev1.Pod) ([]byte, error) { // Set the image to the pod if image.ImageIsEqual(container.Image) { p.AddPatch(patch.OpReplace, fmt.Sprintf("/spec/containers/%d/image", i), image.GetImageWithTag()) - // increment the total number of patches - promPatchTotal.Inc() } // Annotations diff --git a/cmd/kimup/scheduler.go b/cmd/kimup/scheduler.go index 955ba38..3883a89 100644 --- a/cmd/kimup/scheduler.go +++ b/cmd/kimup/scheduler.go @@ -6,11 +6,13 @@ import ( "time" "github.com/gookit/event" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" "k8s.io/client-go/util/retry" "github.com/orange-cloudavenue/kube-image-updater/internal/actions" "github.com/orange-cloudavenue/kube-image-updater/internal/kubeclient" + "github.com/orange-cloudavenue/kube-image-updater/internal/metrics" "github.com/orange-cloudavenue/kube-image-updater/internal/models" "github.com/orange-cloudavenue/kube-image-updater/internal/registry" "github.com/orange-cloudavenue/kube-image-updater/internal/rules" @@ -28,6 +30,12 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) { crontab.New(ctx) // Add event lock event.On(triggers.RefreshImage.String(), event.ListenerFunc(func(e event.Event) (err error) { + // Increment the counter for the events + metrics.Events().Total().Inc() + // Start the timer for the event execution + timerEvents := metrics.Events().Duration() + defer timerEvents.ObserveDuration() + if l[e.Data()["namespace"].(string)+"/"+e.Data()["image"].(string)] == nil { l[e.Data()["namespace"].(string)+"/"+e.Data()["image"].(string)] = &sync.RWMutex{} } @@ -64,6 +72,10 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) { i := utils.ImageParser(image.Spec.Image) + // Prometheus metrics - Increment the counter for the registry + metrics.Registry().Total().Inc() + timerRegistry := metrics.Registry().Duration() + re, err := registry.New(ctx, image.Spec.Image, registry.Settings{ InsecureTLS: image.Spec.InsecureSkipTLSVerify, Username: func() string { @@ -80,13 +92,25 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) { }(), }) if err != nil { + // Prometheus metrics - Increment the counter for the registry with error + metrics.Registry().TotalErr().Inc() + return err } + timerRegistry.ObserveDuration() + + // Prometheus metrics - Increment the counter for the tags + metrics.Tags().Total().Inc() + timerTags := metrics.Tags().Duration() tagsAvailable, err := re.Tags() if err != nil { + // Prometheus metrics - Increment the counter for the tags with error + metrics.Tags().TotalErr().Inc() + return err } + timerTags.ObserveDuration() log.Debugf("[RefreshImage] %d tags available for %s", len(tagsAvailable), image.Spec.Image) @@ -103,12 +127,23 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) { } r.Init(tag, tagsAvailable, rule.Value) + + // Prometheus metrics - Increment the counter for the rules + metrics.Rules().Total().Inc() + timerRules := prometheus.NewTimer(metrics.Rules().Duration()) + match, newTag, err := r.Evaluate() if err != nil { + // Prometheus metrics - Increment the counter for the evaluated rule with error + metrics.Rules().TotalErr().Inc() + log.Errorf("Error evaluating rule: %v", err) continue } + // Prometheus metrics - Observe the duration of the rule evaluation + timerRules.ObserveDuration() + if match { for _, action := range rule.Actions { a, err := actions.GetActionWithUntypedName(action.Type) @@ -122,12 +157,22 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) { New: newTag, AvailableTags: tagsAvailable, }, &image, action.Data) + + // Prometheus metrics - Increment the counter for the actions + metrics.Actions().Total().Inc() + timerActions := metrics.Actions().Duration() + if err := a.Execute(ctx); err != nil { + // Prometheus metrics - Increment the counter for the executed action with error + metrics.Actions().TotalErr().Inc() + log.Errorf("Error executing action(%s): %v", action.Type, err) continue } - } + // Prometheus metrics - Observe the duration of the action execution + timerActions.ObserveDuration() + } log.Debugf("[RefreshImage] Rule %s evaluated: %v -> %s", rule.Type, tag, newTag) } } @@ -139,6 +184,8 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) { return nil }) + // Prometheus metrics - Increment the counter for the events evaluated with error + metrics.Events().TotalErr().Inc() return retryErr }), event.Normal) } diff --git a/go.mod b/go.mod index 1256cf0..02ad6b0 100644 --- a/go.mod +++ b/go.mod @@ -66,6 +66,7 @@ require ( github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.9 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect diff --git a/internal/httpserver/httpserver.go b/internal/httpserver/httpserver.go index ec9d762..b491eb2 100644 --- a/internal/httpserver/httpserver.go +++ b/internal/httpserver/httpserver.go @@ -84,7 +84,7 @@ func init() { // * Metrics flag.Bool(models.MetricsFlagName, false, "Enable the metrics server.") flag.StringVar(&metricsPort, models.MetricsPortFlagName, models.MetricsDefaultAddr, "Metrics server port.") - flag.StringVar(&metricsPath, models.MetricsPathFlagName, models.HealthzDefaultPath, "Metrics server path.") + flag.StringVar(&metricsPath, models.MetricsPathFlagName, models.MetricsDefaultPath, "Metrics server path.") } // Function to initialize application, return app struct and a func waitgroup. diff --git a/internal/metrics/actions.go b/internal/metrics/actions.go new file mode 100644 index 0000000..e35faa1 --- /dev/null +++ b/internal/metrics/actions.go @@ -0,0 +1,81 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type ( + actions struct{} +) + +var ( + // Prometheus metrics + actionsTotal prometheus.Counter = NewCounter("actions_total", "The total number of action performed.") + actionsErrTotal prometheus.Counter = NewCounter("actions_error_total", "The total number of action performed with error.") + actionsDuration prometheus.Histogram = NewHistogram("actions_duration_seconds", "The duration in seconds of action performed.") +) + +// Actions returns a new actions. +// This is the metrics for the actions. +func Actions() *actions { + return &actions{} +} + +// Total returns the total number of action performed. +// The counter is used to observe the number of actions that have been executed. +// The counter is incremented each time an action is executed +// A good practice is to use the following pattern: +// +// metrics.Actions().Total().Inc() +func (a *actions) Total() prometheus.Counter { + return actionsTotal +} + +// TotalErr returns the total number of action performed with error. +// The counter is used to observe the number of actions that failed. +// The counter is incremented each time an action fails. +// A good practice is to use the following pattern: +// +// metrics.Actions().TotalErr().Inc() +func (a *actions) TotalErr() prometheus.Counter { + return actionsErrTotal +} + +// ExecuteDuration returns the duration of the action execution. +// A good practice is to use the following pattern: +// +// timerActions := metrics.Actions().Duration() +// +// defer timerActions.ObserveDuration() +func (a *actions) Duration() *prometheus.Timer { + return prometheus.NewTimer(actionsDuration) +} + +// // GetCounter return the list of counter for the actions. +// // This is useful to retrieve all the counter like an prometheus Counter for the actions. +// func (a *actions) GetCounter() []prometheus.Counter { +// return []prometheus.Counter{ +// actionsTotal, +// actionsErrTotal, +// } +// } + +// // GetGauge return the list of gauge for the actions. +// // This is useful to retrieve all the gauge like an prometheus Gauge for the actions. +// func (a *actions) GetGauge() []prometheus.Gauge { +// return []prometheus.Gauge{} +// } + +// // GetHistogram return the list of histogram for the actions. +// // This is useful to retrieve all the histogram like an prometheus Histogram for the actions. +// func (a *actions) GetHistogram() []prometheus.Histogram { +// return []prometheus.Histogram{ +// actionsDuration, +// } +// } + +// // GetSummary return the list of summary for the actions. +// // This is useful to retrieve all the summary like an prometheus Summary for the actions. +// func (a *actions) GetSummary() []prometheus.Summary { +// return []prometheus.Summary{} +// } diff --git a/internal/metrics/admission-controller-patch.go b/internal/metrics/admission-controller-patch.go new file mode 100644 index 0000000..f47b68b --- /dev/null +++ b/internal/metrics/admission-controller-patch.go @@ -0,0 +1,52 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type ( + admissionControllerPatch struct{} +) + +var ( + // Prometheus metrics + admissionControllerPatchTotal prometheus.Counter = NewCounter("admissionControllerPatch_total", "The total number of patch by the Admission Controller is generate.") + admissionControllerPatchErrTotal prometheus.Counter = NewCounter("admissionControllerPatch_error_total", "The total number of patch by the AdmissionController generate with error.") + admissionControllerPatchDuration prometheus.Histogram = NewHistogram("admissionControllerPatch_duration_seconds", "The duration in seconds of the generated patch by the Admission Controller.") +) + +// admissionControllerPatch returns a new admissionControllerPatch. +// This is the metrics for the admissionControllerPatch. +func AdmissionControllerPatch() *admissionControllerPatch { + return &admissionControllerPatch{} +} + +// Total returns the total number of admissionControllerPatch performed. +// The counter is used to observe the number of admissionControllerPatch that have been +// executed. The counter is incremented each time a tag is executed +// A good practice is to use the following pattern: +// +// metrics.admissionControllerPatch().Total().Inc() +func (a *admissionControllerPatch) Total() prometheus.Counter { + return admissionControllerPatchTotal +} + +// TotalErr returns the total number of admissionControllerPatch performed with error. +// The counter is used to observe the number of admissionControllerPatch that failed. +// The counter is incremented each time a tag fails. +// A good practice is to use the following pattern: +// +// metrics.admissionControllerPatch().TotalErr().Inc() +func (a *admissionControllerPatch) TotalErr() prometheus.Counter { + return admissionControllerPatchErrTotal +} + +// Duration returns the duration of the admissionControllerPatch execution. +// A good practice is to use the following pattern: +// +// timeradmissionControllerPatch := metrics.admissionControllerPatch().Duration() +// +// defer timeradmissionControllerPatch.ObserveDuration() +func (a *admissionControllerPatch) Duration() *prometheus.Timer { + return prometheus.NewTimer(admissionControllerPatchDuration) +} diff --git a/internal/metrics/admission-controller.go b/internal/metrics/admission-controller.go new file mode 100644 index 0000000..4e264cb --- /dev/null +++ b/internal/metrics/admission-controller.go @@ -0,0 +1,52 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type ( + admissionController struct{} +) + +var ( + // Prometheus metrics + admissionControllerTotal prometheus.Counter = NewCounter("admissionController_total", "The total number of action performed.") + admissionControllerTotalErr prometheus.Counter = NewCounter("admissionController_total_err", "The total number of action performed with error.") + admissionControllerDuration prometheus.Histogram = NewHistogram("admissionController_duration_seconds", "The duration in seconds of action performed.") +) + +// admissionController returns a new admissionController. +// This is the metrics for the admissionController. +func AdmissionController() *admissionController { + return &admissionController{} +} + +// Total returns the total number of admission controller is performed. +// The counter is used to observe the number of admissionController that have been executed. +// The counter is incremented each time an admission controller is executed +// A good practice is to use the following pattern: +// +// metrics.admissionController().Total().Inc() +func (a *admissionController) Total() prometheus.Counter { + return admissionControllerTotal +} + +// TotalErr returns the total number of admission controller performed with error. +// The counter is used to observe the number of admissionController that failed. +// The counter is incremented each time an admission controller fails. +// A good practice is to use the following pattern: +// +// metrics.admissionController().TotalErr().Inc() +func (a *admissionController) TotalErr() prometheus.Counter { + return admissionControllerTotalErr +} + +// ExecuteDuration returns the duration of the admission controller execution. +// A good practice is to use the following pattern: +// +// timer := metrics.AdmissionController().Duration() +// +// defer timer.ObserveDuration() +func (a *admissionController) Duration() *prometheus.Timer { + return prometheus.NewTimer(admissionControllerDuration) +} diff --git a/internal/metrics/events.go b/internal/metrics/events.go new file mode 100644 index 0000000..34a9096 --- /dev/null +++ b/internal/metrics/events.go @@ -0,0 +1,53 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type ( + events struct{} +) + +var ( + // Prometheus metrics + eventsTotal prometheus.Counter = NewCounter("events_total", "The total number of events.") + eventsTotalErr prometheus.Counter = NewCounter("events_total_err", "The total number of events with error.") + eventsDuration prometheus.Histogram = NewHistogram("events_duration_seconds", "The duration in seconds of events.") +) + +// Events returns a new events. +// This is the metrics for the events. +func Events() *events { + return &events{} +} + +// Total returns the total number of event performed. +// The counter is used to observe the number of events that have been executed. +// The counter is incremented each time an event is executed +// A good practice is to use the following pattern: +// +// metrics.Events().Total().Inc() +func (a *events) Total() prometheus.Counter { + return eventsTotal +} + +// TotalErr returns the total number of event performed with error. +// The counter is used to observe the number of events that failed. +// The counter is incremented each time an event fails. +// A good practice is to use the following pattern: +// +// metrics.Events().TotalErr().Inc() +func (a *events) TotalErr() prometheus.Counter { + return eventsTotalErr +} + +// Duration returns a prometheus histogram object. +// The histogram is used to observe the duration of the events execution. +// A good practice is to use the following pattern: +// +// timerEvents := metrics.Events().Duration() +// +// defer timerEvents.ObserveDuration() +func (a *events) Duration() *prometheus.Timer { + return prometheus.NewTimer(eventsDuration) +} diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 1065b94..a1412a2 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -5,6 +5,49 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" ) +// metricBase is a base struct for all metrics +type ( + metricBase struct { + Name string + Help string + } + + MetricCounter struct { + metricBase + Counter *prometheus.Counter + } + + MetricGauge struct { + metricBase + Gauge prometheus.Gauge + } + + MetricHistogram struct { + metricBase + Histogram *prometheus.Histogram + } + + MetricSummary struct { + metricBase + Summary prometheus.Summary + } + + MetricType string +) + +const ( + // MetricTypeCounter is the type of the metric counter + MetricTypeCounter MetricType = "counter" + // MetricTypeGauge is the type of the metric gauge + MetricTypeGauge MetricType = "gauge" + // MetricTypeHistogram is the type of the metric histogram + MetricTypeHistogram MetricType = "histogram" + // MetricTypeSummary is the type of the metric summary + MetricTypeSummary MetricType = "summary" +) + +var Metrics = make(map[MetricType]map[string]interface{}) + // NewCounter creates a new Prometheus counter // The NewCounter use a function to directly register the counter // The function returns a prometheus.Counter @@ -12,10 +55,27 @@ import ( // Name: The name of the counter // Help: The description help text of the counter func NewCounter(name, help string) prometheus.Counter { - return promauto.NewCounter(prometheus.CounterOpts{ + if Metrics[MetricTypeCounter] == nil { + Metrics[MetricTypeCounter] = make(map[string]interface{}) + } + + x := promauto.NewCounter(prometheus.CounterOpts{ Name: name, Help: help, }) + + // Add the counter to the map + Metrics[MetricTypeCounter][name] = MetricCounter{ + // Create the metricBase + metricBase: metricBase{ + Name: name, + Help: help, + }, + // Create the counter prometheus + Counter: &x, + } + + return *Metrics[MetricTypeCounter][name].(MetricCounter).Counter } // NewGauge creates a new Prometheus gauge @@ -25,10 +85,25 @@ func NewCounter(name, help string) prometheus.Counter { // Name: The name of the gauge // Help: The description help text of the gauge func NewGauge(name, help string) prometheus.Gauge { - return promauto.NewGauge(prometheus.GaugeOpts{ - Name: name, - Help: help, - }) + if Metrics[MetricTypeGauge] == nil { + Metrics[MetricTypeGauge] = make(map[string]interface{}) + } + + // Add the gauge to the map + Metrics[MetricTypeGauge][name] = MetricGauge{ + // Create the metricBase + metricBase: metricBase{ + Name: name, + Help: help, + }, + // Create the gauge prometheus + Gauge: promauto.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: help, + }), + } + + return Metrics[MetricTypeGauge][name].(MetricGauge).Gauge } // NewHistogram creates a new Prometheus histogram @@ -38,10 +113,29 @@ func NewGauge(name, help string) prometheus.Gauge { // Name: The name of the histogram // Help: The description help text of the histogram func NewHistogram(name, help string) prometheus.Histogram { - return promauto.NewHistogram(prometheus.HistogramOpts{ + if Metrics[MetricTypeHistogram] == nil { + Metrics[MetricTypeHistogram] = make(map[string]interface{}) + } + + x := promauto.NewHistogram(prometheus.HistogramOpts{ Name: name, Help: help, + // Bucket configuration for microsecond durations + Buckets: []float64{0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1, 2, 5, 10}, }) + + // Add the histogram to the map + Metrics[MetricTypeHistogram][name] = MetricHistogram{ + // Create the metricBase + metricBase: metricBase{ + Name: name, + Help: help, + }, + // Create the histogram prometheus + Histogram: &x, + } + + return *Metrics[MetricTypeHistogram][name].(MetricHistogram).Histogram } // NewSummary creates a new Prometheus summary @@ -51,8 +145,23 @@ func NewHistogram(name, help string) prometheus.Histogram { // Name: The name of the summary // Help: The description help text of the summary func NewSummary(name, help string) prometheus.Summary { - return promauto.NewSummary(prometheus.SummaryOpts{ - Name: name, - Help: help, - }) + if Metrics[MetricTypeSummary] == nil { + Metrics[MetricTypeSummary] = make(map[string]interface{}) + } + + // Add the summary to the map + Metrics[MetricTypeSummary][name] = MetricSummary{ + // Create the metricBase + metricBase: metricBase{ + Name: name, + Help: help, + }, + // Create the summary prometheus + Summary: promauto.NewSummary(prometheus.SummaryOpts{ + Name: name, + Help: help, + }), + } + + return Metrics[MetricTypeSummary][name].(MetricSummary).Summary } diff --git a/internal/metrics/registry.go b/internal/metrics/registry.go new file mode 100644 index 0000000..b21a5e5 --- /dev/null +++ b/internal/metrics/registry.go @@ -0,0 +1,52 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type ( + registry struct{} +) + +var ( + // Prometheus metrics + registryTotal prometheus.Counter = NewCounter("registry_total", "The total number of registry evaluated.") + registryErrTotal prometheus.Counter = NewCounter("registry_error_total", "The total number of registry evaluated with error.") + registryDuration prometheus.Histogram = NewHistogram("registry_duration_seconds", "The duration in seconds of registry evaluated.") +) + +// Registry returns a new registry. +// This is the metrics for the registry. +func Registry() *registry { + return ®istry{} +} + +// Total returns the total number of registry is called. +// The counter is used to observe the number of registry that have been executed. +// The counter is incremented each time an registry is executed +// A good practice is to use the following pattern: +// +// metrics.Registry().Total().Inc() +func (a *registry) Total() prometheus.Counter { + return registryTotal +} + +// TotalErr returns the total number of registry is called with error. +// The counter is used to observe the number of registry that failed. +// The counter is incremented each time an registry fails. +// A good practice is to use the following pattern: +// +// metrics.Registry().TotalErr().Inc() +func (a *registry) TotalErr() prometheus.Counter { + return registryErrTotal +} + +// Duration returns the duration of the registry execution. +// A good practice is to use the following pattern: +// +// timerRegistry := metrics.Registry().Duration() + +// defer timerRegistry.ObserveDuration() +func (a *registry) Duration() *prometheus.Timer { + return prometheus.NewTimer(registryDuration) +} diff --git a/internal/metrics/rules.go b/internal/metrics/rules.go new file mode 100644 index 0000000..ce68a61 --- /dev/null +++ b/internal/metrics/rules.go @@ -0,0 +1,51 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type ( + rules struct{} +) + +var ( + // Prometheus metrics + rulesTotal prometheus.Counter = NewCounter("rules_total", "The total number of rules evaluated.") + rulesErrTotal prometheus.Counter = NewCounter("rules_error_total", "The total number of rules evaluated with error.") + rulesDuration prometheus.Histogram = NewHistogram("rules_duration_seconds", "The duration in seconds of rules evaluated.") +) + +// Rules returns a new rules. +// This is the metrics for the rules. +func Rules() *rules { + return &rules{} +} + +// Total returns the total number of rule performed. +// The counter is used to observe the number of rules that have been +// executed. The counter is incremented each time a rule is executed +// A good practice is to use the following pattern: +// +// metrics.Rules().Total().Inc() +func (a *rules) Total() prometheus.Counter { + return rulesTotal +} + +// TotalErr returns the total number of rule performed with error. +// The counter is used to observe the number of rules that failed. +// The counter is incremented each time a rule fails. +// A good practice is to use the following pattern: +// +// metrics.Rules().TotalErr().Inc() +func (a *rules) TotalErr() prometheus.Counter { + return rulesErrTotal +} + +// Duration returns the duration of the rule execution. +// A good practice is to use the following pattern: +// +// timerRules := prometheus.NewTimer(metrics.Rules().Duration()) + +func (a *rules) Duration() prometheus.Histogram { + return rulesDuration +} diff --git a/internal/metrics/tags.go b/internal/metrics/tags.go new file mode 100644 index 0000000..68b815e --- /dev/null +++ b/internal/metrics/tags.go @@ -0,0 +1,52 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type ( + tags struct{} +) + +var ( + // Prometheus metrics + tagsTotal prometheus.Counter = NewCounter("tags_total", "The total number of func tags is called to list tags.") + tagsErrTotal prometheus.Counter = NewCounter("tags_error_total", "The total number return by the func tags with error.") + tagsDuration prometheus.Histogram = NewHistogram("tags_duration_seconds", "The duration in seconds for func tags to list the tags.") +) + +// Tags returns a new tags. +// This is the metrics for the tags. +func Tags() *tags { + return &tags{} +} + +// Total returns the total number of func tags is called. +// The counter is used to observe the number of func tags is executed. +// The counter is incremented each time a tag is executed +// A good practice is to use the following pattern: +// +// metrics.Tags().Total().Inc() +func (a *tags) Total() prometheus.Counter { + return tagsTotal +} + +// TotalErr returns the total number of func tags called with error. +// The counter is used to observe the number of func tags that failed. +// The counter is incremented each time a tag fails. +// A good practice is to use the following pattern: +// +// metrics.Tags().TotalErr().Inc() +func (a *tags) TotalErr() prometheus.Counter { + return tagsErrTotal +} + +// Duration returns the duration of the func tags execution. +// A good practice is to use the following pattern: +// +// timerTags := metrics.Tags().Duration() +// +// defer timerTags.ObserveDuration() +func (a *tags) Duration() *prometheus.Timer { + return prometheus.NewTimer(tagsDuration) +} diff --git a/test/metrics/metrics_test.go b/test/metrics/metrics_test.go new file mode 100644 index 0000000..70960dc --- /dev/null +++ b/test/metrics/metrics_test.go @@ -0,0 +1,223 @@ +package metrics_test + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + + "github.com/orange-cloudavenue/kube-image-updater/internal/metrics" +) + +func TestMetric_Counter(t *testing.T) { + // Test the metrics for the actions Counter + list := metrics.Metrics + + // Test the metrics for the actions Counter + type testsCounter []struct { + name string + nameMetric string + data string + value string + c prometheus.Counter + error bool + } + testUnit := make(testsCounter, 0) + + // loop over the list of metrics + for _, m := range list[metrics.MetricTypeCounter] { + // Check if the metric is a metricCounter + if m, ok := m.(metrics.MetricCounter); ok { + // TODO - Constructs all the expected tests + // fill struct test with data + testUnit = testsCounter{ + { + name: "Check Counter " + m.Name, + nameMetric: m.Name, + data: fmt.Sprintf(` +# HELP %s %s +# TYPE %s %s +`, m.Name, m.Help, m.Name, metrics.MetricTypeCounter), + value: " 1\n", + c: *m.Counter, + error: false, + }, + { + name: "Check Counter mistake between name and TYPE description " + m.Name, + nameMetric: m.Name, + data: fmt.Sprintf(` +# HELP %s %s +# TYPE %s_mistake_error_in_TYPE %s +`, m.Name, m.Help, m.Name, metrics.MetricTypeCounter), + value: " 1\n", + c: *m.Counter, + error: true, // Error because the counter name is not the same in the HELP description + }, + { + name: "Check Counter mistake between name and HELP description " + m.Name, + nameMetric: m.Name, + data: fmt.Sprintf(` +# HELP %s_mistake_error_in_HELP %s +# TYPE %s %s +`, m.Name, m.Help, m.Name, metrics.MetricTypeCounter), + value: " 1\n", + c: *m.Counter, + error: true, // Error because the counter name is not the same in the description + }, + } // end of testsCounter struct + } // end of if m, ok := m.(metrics.MetricCounter) + + // Test the metrics for the actions Counter + for _, tt := range testUnit { + t.Run(tt.name, func(t *testing.T) { + counter := tt.c + counter.Inc() + // Compare the metrics + if err := testutil.CollectAndCompare(counter, strings.NewReader(tt.data+tt.nameMetric+tt.value), tt.nameMetric); err != nil { + if !tt.error { + t.Errorf("unexpected error: %v", err) + } + } + }) + } // end of loop over the list of metrics + } +} + +func TestMetric_Histogram(t *testing.T) { + // Test the metrics for the actions Histogram + list := metrics.Metrics + + type testsHistogram []struct { + name string + nameMetric string + data string + value string + observation float64 + h prometheus.Histogram + error bool + } + testUnit := make(testsHistogram, 0) + + // loop over the list of metrics + for _, m := range list[metrics.MetricTypeHistogram] { + // Check if the metric is a metricHistogram + if m, ok := m.(metrics.MetricHistogram); ok { + // TODO - Constructs all the expected tests + // fill struct test with data + testUnit = testsHistogram{ + { + name: "Check Histogram " + m.Name, + nameMetric: m.Name, + data: fmt.Sprintf(` +# HELP %s %s +# TYPE %s %s +`, m.Name, m.Help, m.Name, metrics.MetricTypeHistogram), + value: fmt.Sprintf(` +%s_bucket{le="0.001"} 0 +%s_bucket{le="0.005"} 0 +%s_bucket{le="0.01"} 0 +%s_bucket{le="0.02"} 0 +%s_bucket{le="0.05"} 0 +%s_bucket{le="0.1"} 0 +%s_bucket{le="0.5"} 1 +%s_bucket{le="1"} 1 +%s_bucket{le="2"} 1 +%s_bucket{le="5"} 1 +%s_bucket{le="10"} 1 +%s_bucket{le="+Inf"} 1 +%s_sum 0.1 +%s_count 1 +`, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name), + observation: 0.1, + h: *m.Histogram, + error: false, + }, + { + name: "Check Histogram with bucket missing " + m.Name, + nameMetric: m.Name, + data: fmt.Sprintf(` +# HELP %s %s +# TYPE %s %s +`, m.Name, m.Help, m.Name, metrics.MetricTypeHistogram), + value: fmt.Sprintf(` +%s_bucket{le="0.001"} 0 +%s_bucket{le="0.01"} 0 +%s_bucket{le="0.02"} 0 +%s_bucket{le="0.05"} 0 +%s_bucket{le="0.1"} 0 +%s_bucket{le="0.5"} 1 +%s_bucket{le="1"} 1 +%s_bucket{le="2"} 1 +%s_bucket{le="5"} 1 +%s_bucket{le="10"} 1 +%s_bucket{le="+Inf"} 1 +%s_sum 0.1 +%s_count 1 +`, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name), + observation: 0.1, + h: *m.Histogram, + error: true, // Error because the bucket is missing + }, + { + name: "Check Histogram with a wrong observation " + m.Name, + nameMetric: m.Name, + data: fmt.Sprintf(` +# HELP %s %s +# TYPE %s %s +`, m.Name, m.Help, m.Name, metrics.MetricTypeHistogram), + value: fmt.Sprintf(` +%s_bucket{le="0.001"} 0 +%s_bucket{le="0.005"} 0 +%s_bucket{le="0.01"} 0 +%s_bucket{le="0.02"} 0 +%s_bucket{le="0.05"} 0 +%s_bucket{le="0.1"} 0 +%s_bucket{le="0.5"} 1 +%s_bucket{le="1"} 1 +%s_bucket{le="2"} 1 +%s_bucket{le="5"} 1 +%s_bucket{le="10"} 1 +%s_bucket{le="+Inf"} 1 +%s_sum 0.1 +%s_count 1 +`, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name, m.Name), + observation: 2.5, + h: *m.Histogram, + error: true, // Error because the observation is wrong + }, + } // end of testsHistogram struct + } // end of if m, ok := m.(metrics.MetricHistogram) + + // Test the metrics for the actions Histogram + for _, tt := range testUnit { + t.Run(tt.name, func(t *testing.T) { + // Get the Duration histogram + timer := metrics.Actions().Duration() + + // Simulate an action duration + time.Sleep(100 * time.Millisecond) + timer.ObserveDuration() + + // Verify the histogram value + if err := testutil.CollectAndCompare(tt.h, strings.NewReader(tt.data+tt.value)); err != nil { + // Check if err contains "_sum" to avoid the error + if !strings.Contains(err.Error(), "_sum") { + if !tt.error { + t.Errorf("unexpected error: %v", err) + } + } + } + + // Check the observation + if got := timer.ObserveDuration().Seconds(); got <= tt.observation { + if !tt.error { + t.Errorf("expected %v, got %v", tt.observation, got) + } + } + }) + } + } // end of loop over the list of metrics +}