Skip to content

Commit

Permalink
chore: optimize metrics creation and add documentation (#66)
Browse files Browse the repository at this point in the history
Co-authored-by: dmicheneau <[email protected]>
  • Loading branch information
azrod and dmicheneau authored Oct 22, 2024
1 parent 0df440a commit d55cd0f
Show file tree
Hide file tree
Showing 29 changed files with 859 additions and 431 deletions.
2 changes: 1 addition & 1 deletion .github/changelog/generate-changelog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if [ -z "$PREVIOUS_CHANGELOG" ]
then
echo "Unable to locate previous changelog contents."
exit 1
fi
fi

CHANGELOG=$($(go env GOPATH)/bin/changelog-build -this-release $TARGET_SHA \
-last-release $PREVIOUS_RELEASE_SHA \
Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/go-generate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Terraform Provider testing workflow.
name: go-generate

# This GitHub action runs your tests for each pull request and push.
# Optionally, you can turn it on using a schedule for regular testing.
on:
pull_request:
paths:
- 'docs/**'
- 'tools/**'

jobs:
generate:
name: Generate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4 # v3.5.0
- uses: actions/setup-go@v5 # v4.0.0
with:
go-version-file: 'go.mod'
- run: go generate ./...
- name: git diff
run: |
git diff --compact-summary --exit-code || \
(echo; echo "Unexpected difference in directories after code generation. Run 'go generate ./...' command and commit."; exit 1)
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
name: 'Force pkg.go.dev release sync'

on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
- '**/v[0-9]+.[0-9]+.[0-9]+'
release:
types: [published]

jobs:
build:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ jobs:
runs-on:
group: Default
steps:
- uses: creekorful/[email protected]
- uses: creekorful/[email protected]
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Unit tests

on:
pull_request_target:
pull_request:
workflow_dispatch:

permissions:
Expand Down
17 changes: 15 additions & 2 deletions .github/workflows/new-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,23 @@ jobs:
- name: Run Go unit tests
run: |
go test ./...
generate:
needs: [pre-check]
name: Generate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4 # v3.5.0
- uses: actions/setup-go@v5 # v4.0.0
with:
go-version-file: 'go.mod'
- run: go generate ./...
- name: git diff
run: |
git diff --compact-summary --exit-code || \
(echo; echo "Unexpected difference in directories after code generation. Run 'go generate ./...' command and commit."; exit 1)
# * Step 2: Create a new tag
tag:
needs: [golangci-lint, pre-check, tag-already-exist, testsunit]
needs: [golangci-lint, pre-check, tag-already-exist, testsunit, generate]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: check-merge-conflict
- id: trailing-whitespace
- id: end-of-file-fixer
args: ["--markdown-linebreak-ext=md"]
- id: forbid-submodules

## GOLANG
Expand Down
4 changes: 4 additions & 0 deletions cmd/admission-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/orange-cloudavenue/kube-image-updater/internal/httpserver"
client "github.com/orange-cloudavenue/kube-image-updater/internal/kubeclient"
"github.com/orange-cloudavenue/kube-image-updater/internal/log"
"github.com/orange-cloudavenue/kube-image-updater/internal/metrics"
)

var (
Expand All @@ -38,6 +39,9 @@ var (
)

func init() {
// Init Metrics
metrics.AdmissionController()

// webhook server running namespace (default to "default")
if os.Getenv("POD_NAMESPACE") != "" {
webhookNamespace = os.Getenv("POD_NAMESPACE")
Expand Down
24 changes: 12 additions & 12 deletions cmd/admission-controller/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (
// func serveHandler
func ServeHandler(w http.ResponseWriter, r *http.Request) {
// Prometheus metrics
metrics.AdmissionController().Total().Inc()
timeAC := metrics.AdmissionController().Duration()
metrics.AdmissionController().RequestTotal.Inc()
timeAC := metrics.AdmissionController().RequestDuration.NewTimer()
defer timeAC.ObserveDuration()

var body []byte
Expand All @@ -35,7 +35,7 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) {
}
if len(body) == 0 {
// increment the total number of errors
metrics.AdmissionController().TotalErr().Inc()
metrics.AdmissionController().RequestErrorTotal.Inc()

log.Error("empty body")
http.Error(w, "empty body", http.StatusBadRequest)
Expand All @@ -46,7 +46,7 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) {
contentType := r.Header.Get("Content-Type")
if contentType != "application/json" {
// increment the total number of errors
metrics.AdmissionController().TotalErr().Inc()
metrics.AdmissionController().RequestErrorTotal.Inc()

http.Error(w, "invalid Content-Type, expect `application/json`", http.StatusUnsupportedMediaType)
return
Expand All @@ -56,7 +56,7 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) {
ar := admissionv1.AdmissionReview{}
if _, _, err := deserializer.Decode(body, nil, &ar); err != nil {
// increment the total number of errors
metrics.AdmissionController().TotalErr().Inc()
metrics.AdmissionController().RequestErrorTotal.Inc()

log.WithError(err).Warn("Can't decode body")
admissionResponse = &admissionv1.AdmissionResponse{
Expand Down Expand Up @@ -84,13 +84,13 @@ func ServeHandler(w http.ResponseWriter, r *http.Request) {
resp, err := json.Marshal(admissionReview)
if err != nil {
// increment the total number of errors
metrics.AdmissionController().TotalErr().Inc()
metrics.AdmissionController().RequestErrorTotal.Inc()

http.Error(w, fmt.Sprintf("could not encode response: %v", err), http.StatusInternalServerError)
}
if _, err := w.Write(resp); err != nil {
// increment the total number of errors
metrics.AdmissionController().TotalErr().Inc()
metrics.AdmissionController().RequestErrorTotal.Inc()

http.Error(w, fmt.Sprintf("could not write response: %v", err), http.StatusInternalServerError)
}
Expand Down Expand Up @@ -139,16 +139,16 @@ func mutate(ctx context.Context, ar *admissionv1.AdmissionReview) *admissionv1.A
// create mutation patch for pod.
func createPatch(ctx context.Context, pod *corev1.Pod) ([]byte, error) {
// Metrics - increment the total number of patch
metrics.AdmissionControllerPatch().Total().Inc()
timePatch := metrics.AdmissionControllerPatch().Duration()
metrics.AdmissionController().PatchTotal.Inc()
timePatch := metrics.AdmissionController().PatchDuration.NewTimer()
defer timePatch.ObserveDuration()

var err error
// find annotation enabled
an := annotations.New(ctx, pod)
if !an.Enabled().Get() {
// increment the total number of errors
metrics.AdmissionControllerPatch().TotalErr().Inc()
metrics.AdmissionController().PatchErrorTotal.Inc()

return nil, fmt.Errorf("annotation not enabled")
}
Expand All @@ -175,7 +175,7 @@ func createPatch(ctx context.Context, pod *corev1.Pod) ([]byte, error) {
image, err = kubeClient.Image().Find(ctx, pod.Namespace, imageP.GetImageWithoutTag())
if err != nil {
// increment the total number of errors
metrics.AdmissionControllerPatch().TotalErr().Inc()
metrics.AdmissionController().PatchErrorTotal.Inc()

log.
WithFields(logrus.Fields{
Expand All @@ -191,7 +191,7 @@ func createPatch(ctx context.Context, pod *corev1.Pod) ([]byte, error) {
image, err = kubeClient.Image().Get(ctx, pod.Namespace, crdName)
if err != nil {
// increment the total number of errors
metrics.AdmissionControllerPatch().TotalErr().Inc()
metrics.AdmissionController().PatchErrorTotal.Inc()

log.
WithFields(logrus.Fields{
Expand Down
8 changes: 8 additions & 0 deletions cmd/kimup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/orange-cloudavenue/kube-image-updater/internal/httpserver"
"github.com/orange-cloudavenue/kube-image-updater/internal/kubeclient"
"github.com/orange-cloudavenue/kube-image-updater/internal/log"
"github.com/orange-cloudavenue/kube-image-updater/internal/metrics"
"github.com/orange-cloudavenue/kube-image-updater/internal/models"
"github.com/orange-cloudavenue/kube-image-updater/internal/triggers"
)
Expand All @@ -24,6 +25,13 @@ var (
)

func init() {
// Initialize the metrics
metrics.Tags()
metrics.Events()
metrics.Actions()
metrics.Rules()
metrics.Registry()

// TODO add namespace scope
// Flag "loglevel" is set in log package
flag.Parse()
Expand Down
33 changes: 17 additions & 16 deletions cmd/kimup/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {
// Add event lock
event.On(triggers.RefreshImage.String(), event.ListenerFunc(func(e event.Event) (err error) {
// Increment the counter for the events
metrics.Events().Total().Inc()
metrics.Events().TriggeredTotal.Inc()
// Start the timer for the event execution
timerEvents := metrics.Events().Duration()
timerEvents := metrics.Events().TriggeredDuration.NewTimer()
defer timerEvents.ObserveDuration()

if l[e.Data()["namespace"].(string)+"/"+e.Data()["image"].(string)] == nil {
Expand Down Expand Up @@ -72,8 +72,8 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {
i := utils.ImageParser(image.Spec.Image)

// Prometheus metrics - Increment the counter for the registry
metrics.Registry().Total().Inc()
timerRegistry := metrics.Registry().Duration()
metrics.Registry().RequestTotal.WithLabelValues(i.GetRegistry()).Inc()
timerRegistry := metrics.Registry().RequestDuration.NewTimer(i.GetRegistry())

re, err := registry.New(ctx, image.Spec.Image, registry.Settings{
InsecureTLS: image.Spec.InsecureSkipTLSVerify,
Expand All @@ -93,24 +93,25 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {
timerRegistry.ObserveDuration()
if err != nil {
// Prometheus metrics - Increment the counter for the registry with error
metrics.Registry().TotalErr().Inc()
metrics.Registry().RequestErrorTotal.WithLabelValues(i.GetRegistry()).Inc()

return err
}

// Prometheus metrics - Increment the counter for the tags
metrics.Tags().Total().Inc()
timerTags := metrics.Tags().Duration()
metrics.Tags().RequestTotal.Inc()
timerTags := metrics.Tags().RequestDuration.NewTimer()

tagsAvailable, err := re.Tags()
timerTags.ObserveDuration()
if err != nil {
// Prometheus metrics - Increment the counter for the tags with error
metrics.Tags().TotalErr().Inc()

metrics.Tags().RequestErrorTotal.Inc()
return err
}

metrics.Tags().AvailableSum.WithLabelValues(image.Spec.Image).Observe(float64(len(tagsAvailable)))

log.Debugf("[RefreshImage] %d tags available for %s", len(tagsAvailable), image.Spec.Image)

for _, rule := range image.Spec.Rules {
Expand All @@ -128,8 +129,8 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {
r.Init(tag, tagsAvailable, rule.Value)

// Prometheus metrics - Increment the counter for the rules
metrics.Rules().Total().Inc()
timerRules := metrics.Rules().Duration()
metrics.Rules().EvaluatedTotal.Inc()
timerRules := metrics.Rules().EvaluatedDuration.NewTimer()

match, newTag, err := r.Evaluate()

Expand All @@ -138,7 +139,7 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {

if err != nil {
// Prometheus metrics - Increment the counter for the evaluated rule with error
metrics.Rules().TotalErr().Inc()
metrics.Rules().EvaluatedErrorTotal.Inc()

log.Errorf("Error evaluating rule: %v", err)
continue
Expand All @@ -159,8 +160,8 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {
}, &image, action.Data)

// Prometheus metrics - Increment the counter for the actions
metrics.Actions().Total().Inc()
timerActions := metrics.Actions().Duration()
metrics.Actions().ExecutedTotal.Inc()
timerActions := metrics.Actions().ExecutedDuration.NewTimer()

err = a.Execute(ctx)

Expand All @@ -169,7 +170,7 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {

if err != nil {
// Prometheus metrics - Increment the counter for the executed action with error
metrics.Actions().TotalErr().Inc()
metrics.Actions().ExecutedErrorTotal.Inc()

log.Errorf("Error executing action(%s): %v", action.Type, err)
continue
Expand All @@ -183,7 +184,7 @@ func initScheduler(ctx context.Context, k kubeclient.Interface) {
})

// Prometheus metrics - Increment the counter for the events evaluated with error
metrics.Events().TotalErr().Inc()
metrics.Events().TriggerdErrorTotal.Inc()
return retryErr
}), event.Normal)
}
49 changes: 49 additions & 0 deletions docs/advanced/metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
hide:
- toc
---

# Metrics

kimup exposes metrics to monitor the performance. The metrics are exposed in the Prometheus format and can be scraped by Prometheus or any other monitoring tool that can scrape Prometheus.

## Settings

The following arguments can be used to configure the metrics *(Available in kimup-operator, kimup-controller and kimup-admission-controller)*:

| Flag | Default | Description |
| -------------- | -------- | ------------------------- |
| --metrics | false | Enable metrics collection |
| --metrics-port | :9080 | Port to expose metrics on |
| --metrics-path | /metrics | Path to expose metrics on |


## Metrics

The following metrics are exposed:

| Metrics | Description |
| ---------------------------------------------- | ----------------------------------------------------------- |
| kimup_actions_executed_duration | The duration in seconds of action performed. |
| kimup_actions_executed_error_total | The total number of action performed with error. |
| kimup_actions_executed_total | The total number of action performed. |
| kimup_admission_controller_patch_duration | The duration in seconds of patch in admission controller. |
| kimup_admission_controller_patch_error_total | The total number of patch action performed with error. |
| kimup_admission_controller_patch_total | The total number of patch action performed. |
| kimup_admission_controller_request_duration | The duration in seconds of request in admission controller. |
| kimup_admission_controller_request_error_total | The total number of request received with error. |
| kimup_admission_controller_request_total | The total number of request received. |
| kimup_events_triggerd_error_total | The total number of events triggered with error. |
| kimup_events_triggered_duration | The duration in seconds of events triggered. |
| kimup_events_triggered_total | The total number of events triggered. |
| kimup_registry_request_duration | The duration in seconds of registry evaluated. |
| kimup_registry_request_error_total | The total number of registry evaluated with error. |
| kimup_registry_request_total | The total number of registry evaluated. |
| kimup_rules_evaluated_duration | The duration in seconds of rules evaluated. |
| kimup_rules_evaluated_error_total | The total number of rules evaluated with error. |
| kimup_rules_evaluated_total | The total number of rules evaluated. |
| kimup_tags_available_sum | The total number of tags available for an image. |
| kimup_tags_request_duration | The duration in seconds of the request to list tags. |
| kimup_tags_request_error_total | The total number returned an error when calling list tags. |
| kimup_tags_request_total | The total number of requests to list tags. |

Loading

0 comments on commit d55cd0f

Please sign in to comment.