Skip to content

Commit

Permalink
feat: add metrics/healthz/readyz operator
Browse files Browse the repository at this point in the history
  • Loading branch information
azrod committed Oct 30, 2024
1 parent 61cd05f commit cca1c99
Show file tree
Hide file tree
Showing 15 changed files with 181 additions and 87 deletions.
11 changes: 11 additions & 0 deletions .changelog/89.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
```release-note:enhancement
`webserver` - Now all webserver use same `log` format.
```

```release-note:enhancement
`operator` - Now operator expose `metrics` for monitoring.
```

```release-note:enhancement
`operator` - Now operator expose /healthz for health check and /readyz for readiness check.
```
45 changes: 27 additions & 18 deletions cmd/operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,17 @@ package main
import (
"context"
"flag"
"net"
"os"
"os/signal"
"syscall"
"time"

"github.com/bombsimon/logrusr/v4"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
controllermetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

Expand All @@ -39,6 +38,7 @@ import (
"github.com/orange-cloudavenue/kube-image-updater/internal/httpserver"
"github.com/orange-cloudavenue/kube-image-updater/internal/kubeclient"
"github.com/orange-cloudavenue/kube-image-updater/internal/log"
"github.com/orange-cloudavenue/kube-image-updater/internal/metrics"
"github.com/orange-cloudavenue/kube-image-updater/internal/models"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand All @@ -52,6 +52,12 @@ var (
)

func init() {
// Set the controllermetrics prometheus registry into the metrics package
metrics.PFactory = controllermetrics.Registry

// Initialize the metrics
metrics.Mutator()

utilruntime.Must(clientgoscheme.AddToScheme(scheme))

utilruntime.Must(kimupv1alpha1.AddToScheme(scheme))
Expand All @@ -74,14 +80,25 @@ func main() {
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
// TODO pass metrics to common metrics server
BindAddress: "0", // disable metrics service
BindAddress: "0", // metrics are served by common metrics server
},
// TODO reenable and use comman healthz server to check the health of the application
HealthProbeBindAddress: "0", // disable health probe service
LeaderElection: enableLeaderElection,
LeaderElectionID: "71be4586.cloudavenue.io",
WebhookServer: webhook,
HealthProbeBindAddress: func() string {
if flag.Lookup(models.MetricsFlagName).Value.String() == "true" {
return httpserver.HealthzPort
}

return "0" // disable healthz server
}(),
LivenessEndpointName: func() string {
if flag.Lookup(models.MetricsFlagName).Value.String() == "true" {
return httpserver.HealthzPath
}

return "" // disable healthz server
}(),
LeaderElection: enableLeaderElection,
LeaderElectionID: "71be4586.kimup.cloudavenue.io",
WebhookServer: webhook,
})
if err != nil {
log.WithError(err).Error("unable to start manager")
Expand Down Expand Up @@ -133,15 +150,7 @@ func main() {
defer cancel()

// * Config the metrics and healthz server
a, waitHTTP := httpserver.Init(ctx, httpserver.WithCustomHandlerForHealth(
func() (bool, error) {
// TODO improve
_, err := net.DialTimeout("tcp", models.HealthzDefaultAddr, 5*time.Second)
if err != nil {
return false, err
}
return true, nil
}))
a, waitHTTP := httpserver.Init(ctx, httpserver.DisableHealth())

if err := a.Run(); err != nil {
log.WithError(err).Error("Failed to start HTTP servers")
Expand Down
48 changes: 24 additions & 24 deletions docs/advanced/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,28 @@ The following arguments can be used to configure the metrics *(Available in kimu

The following metrics are exposed:

| Metrics | Description |
| ---------------------------------------------- | ----------------------------------------------------------- |
| kimup_actions_executed_duration | The duration in seconds of action performed. |
| kimup_actions_executed_error_total | The total number of action performed with error. |
| kimup_actions_executed_total | The total number of action performed. |
| kimup_admission_controller_patch_duration | The duration in seconds of patch in admission controller. |
| kimup_admission_controller_patch_error_total | The total number of patch action performed with error. |
| kimup_admission_controller_patch_total | The total number of patch action performed. |
| kimup_admission_controller_request_duration | The duration in seconds of request in admission controller. |
| kimup_admission_controller_request_error_total | The total number of request received with error. |
| kimup_admission_controller_request_total | The total number of request received. |
| kimup_events_triggerd_error_total | The total number of events triggered with error. |
| kimup_events_triggered_duration | The duration in seconds of events triggered. |
| kimup_events_triggered_total | The total number of events triggered. |
| kimup_registry_request_duration | The duration in seconds of registry evaluated. |
| kimup_registry_request_error_total | The total number of registry evaluated with error. |
| kimup_registry_request_total | The total number of registry evaluated. |
| kimup_rules_evaluated_duration | The duration in seconds of rules evaluated. |
| kimup_rules_evaluated_error_total | The total number of rules evaluated with error. |
| kimup_rules_evaluated_total | The total number of rules evaluated. |
| kimup_tags_available_sum | The total number of tags available for an image. |
| kimup_tags_request_duration | The duration in seconds of the request to list tags. |
| kimup_tags_request_error_total | The total number returned an error when calling list tags. |
| kimup_tags_request_total | The total number of requests to list tags. |
| Metrics | Description |
| ---------------------------------- | ----------------------------------------------------------- |
| kimup_actions_executed_duration | The duration in seconds of action performed. |
| kimup_actions_executed_error_total | The total number of action performed with error. |
| kimup_actions_executed_total | The total number of action performed. |
| kimup_events_triggerd_error_total | The total number of events triggered with error. |
| kimup_events_triggered_duration | The duration in seconds of events triggered. |
| kimup_events_triggered_total | The total number of events triggered. |
| kimup_mutator_patch_duration | The duration in seconds of patch in admission controller. |
| kimup_mutator_patch_error_total | The total number of patch action performed with error. |
| kimup_mutator_patch_total | The total number of patch action performed. |
| kimup_mutator_request_duration | The duration in seconds of request in admission controller. |
| kimup_mutator_request_error_total | The total number of request received with error. |
| kimup_mutator_request_total | The total number of request received. |
| kimup_registry_request_duration | The duration in seconds of registry evaluated. |
| kimup_registry_request_error_total | The total number of registry evaluated with error. |
| kimup_registry_request_total | The total number of registry evaluated. |
| kimup_rules_evaluated_duration | The duration in seconds of rules evaluated. |
| kimup_rules_evaluated_error_total | The total number of rules evaluated with error. |
| kimup_rules_evaluated_total | The total number of rules evaluated. |
| kimup_tags_available_sum | The total number of tags available for an image. |
| kimup_tags_request_duration | The duration in seconds of the request to list tags. |
| kimup_tags_request_error_total | The total number returned an error when calling list tags. |
| kimup_tags_request_total | The total number of requests to list tags. |

1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ replace github.com/distribution/reference => github.com/distribution/reference v
require (
github.com/Masterminds/semver/v3 v3.3.0
github.com/bombsimon/logrusr/v4 v4.1.0
github.com/chi-middleware/logrus-logger v0.3.0
github.com/containers/image/v5 v5.32.2
github.com/containrrr/shoutrrr v0.8.0
github.com/crazy-max/diun/v4 v4.28.0
Expand Down
13 changes: 13 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK3
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chi-middleware/logrus-logger v0.3.0 h1:z/ru6PQUr16VtsbfRuZy7fOIEfHzg8ddh8VSzkVVkGU=
github.com/chi-middleware/logrus-logger v0.3.0/go.mod h1:Q5AOVS6PezKsB0a88BY5cWb2JAY9Rqk7EY2mnTBXec8=
github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8=
github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
Expand All @@ -36,6 +38,7 @@ github.com/containrrr/shoutrrr v0.8.0 h1:mfG2ATzIS7NR2Ec6XL+xyoHzN97H8WPjir8aYzJ
github.com/containrrr/shoutrrr v0.8.0/go.mod h1:ioyQAyu1LJY6sILuNyKaQaw+9Ttik5QePU8atnAdO2o=
github.com/crazy-max/diun/v4 v4.28.0 h1:3kWWkc6TAII32Mxlc7PMN32IEMYcglms0ycqBSpNtFE=
github.com/crazy-max/diun/v4 v4.28.0/go.mod h1:vw/KMvUuaBNolMgwhfib72+neHK+ZOUF6DfmQLDOKbo=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down Expand Up @@ -72,6 +75,7 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-chi/chi/v5 v5.0.8/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-chi/chi/v5 v5.1.0 h1:acVI1TYaD+hhedDJ3r54HyA6sExp3HfXq7QWEEY/xMw=
github.com/go-chi/chi/v5 v5.1.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
Expand Down Expand Up @@ -134,8 +138,11 @@ github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
Expand Down Expand Up @@ -195,15 +202,21 @@ github.com/reugn/go-quartz v0.13.0 h1:0eMxvj28Qu1npIDdN9Mzg9hwyksGH6XJt4Cz0QB8EU
github.com/reugn/go-quartz v0.13.0/go.mod h1:0ghKksELp8MJ4h84T203aTHRF3Kug5BrxEW3ErBvhzY=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/sirupsen/logrus v1.9.2/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
Expand Down
3 changes: 3 additions & 0 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package controller

//+kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;patch
2 changes: 1 addition & 1 deletion internal/controller/image_tag_mutator.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func (i *ImageTagMutator) Handle(ctx context.Context, req admission.Request) adm
image, err := i.KubeAPIClient.Image().Find(ctx, pod.Namespace, imageP.GetImageWithoutTag())
if err != nil {
// increment the total number of errors
metrics.AdmissionController().PatchErrorTotal.Inc()
metrics.Mutator().PatchErrorTotal.Inc()

log.Error(err, "Failed to find kind Image")
continue
Expand Down
22 changes: 11 additions & 11 deletions internal/httpserver/httpserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ import (
"sync"
"time"

logger "github.com/chi-middleware/logrus-logger"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"

"github.com/orange-cloudavenue/kube-image-updater/internal/log"
"github.com/orange-cloudavenue/kube-image-updater/internal/metrics"
"github.com/orange-cloudavenue/kube-image-updater/internal/models"
)

Expand Down Expand Up @@ -59,8 +60,8 @@ type (
)

var (
healthzPort string = ""
healthzPath string = ""
HealthzPort string = "" // expose var to be able to operator
HealthzPath string = "" // expose var to be able to operator
metricsPort string = ""
metricsPath string = ""

Expand All @@ -78,8 +79,8 @@ var (
func init() {
// * Healthz
flag.Bool(models.HealthzFlagName, false, "Enable the healthz server.")
flag.StringVar(&healthzPort, models.HealthzPortFlagName, models.HealthzDefaultAddr, "Healthz server port.")
flag.StringVar(&healthzPath, models.HealthzPathFlagName, models.HealthzDefaultPath, "Healthz server path.")
flag.StringVar(&HealthzPort, models.HealthzPortFlagName, models.HealthzDefaultAddr, "Healthz server port.")
flag.StringVar(&HealthzPath, models.HealthzPathFlagName, models.HealthzDefaultPath, "Healthz server path.")

// * Metrics
flag.Bool(models.MetricsFlagName, false, "Enable the metrics server.")
Expand Down Expand Up @@ -134,24 +135,23 @@ func DisableMetrics() OptionServer {

// Function to create a new server for health
func (a *app) createHealth() *server {
s := a.new(WithAddr(healthzPort))
// s.Config.Get(DefaultPathHealth, health.DefaultHandler().ServeHTTP))
s := a.new(WithAddr(HealthzPort))
return s
}

// Function to create a new server for metrics
func (a *app) createMetrics() *server {
s := a.new(WithAddr(metricsPort))
s.Config.Get(metricsPath, promhttp.Handler().ServeHTTP)
s.Config.Get(metricsPath, metrics.Handler().ServeHTTP)
return s
}

// Function return a server
func (a *app) new(opts ...Option) *server {
// create a new router
r := chi.NewRouter()
r.Use(logger.Logger("router", log.GetLogger()))
r.Use(middleware.Recoverer)
r.Use(middleware.Logger)

// create a new server with default parameters
s := &server{
Expand Down Expand Up @@ -279,7 +279,7 @@ func WithCustomHandlerForHealth(req HealthzFunc) OptionServer {
return
}

a.list["health"].Config.Get(healthzPath, func(w http.ResponseWriter, r *http.Request) {
a.list["health"].Config.Get(HealthzPath, func(w http.ResponseWriter, r *http.Request) {
ok, err := req()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
Expand Down
Loading

0 comments on commit cca1c99

Please sign in to comment.