diff --git a/api/kuik/v1alpha1/cachedimage_utils.go b/api/kuik/v1alpha1/cachedimage_utils.go index cd37905b..6475ead6 100644 --- a/api/kuik/v1alpha1/cachedimage_utils.go +++ b/api/kuik/v1alpha1/cachedimage_utils.go @@ -2,6 +2,7 @@ package v1alpha1 import ( "context" + "strings" "github.com/distribution/reference" "github.com/enix/kube-image-keeper/internal/registry" @@ -20,6 +21,15 @@ func (r *CachedImage) Repository() (reference.Named, error) { return named, nil } +func (r *CachedImage) Upstream() (string, error) { + named, err := r.Repository() + if err != nil { + return "", err + } + + return reference.Domain(named), nil +} + func (r *CachedImage) GetPullSecrets(apiReader client.Reader) ([]corev1.Secret, error) { named, err := r.Repository() if err != nil { @@ -39,3 +49,17 @@ func (r *CachedImage) GetPullSecrets(apiReader client.Reader) ([]corev1.Secret, return pullSecrets, nil } + +func CachedImageNameFromSourceImage(sourceImage string) (string, error) { + ref, err := reference.ParseAnyReference(sourceImage) + if err != nil { + return "", err + } + + sanitizedName := registry.SanitizeName(ref.String()) + if !strings.Contains(sourceImage, ":") { + sanitizedName += "-latest" + } + + return sanitizedName, nil +} diff --git a/cmd/cache/main.go b/cmd/cache/main.go index 2798398b..d4f44129 100644 --- a/cmd/cache/main.go +++ b/cmd/cache/main.go @@ -154,7 +154,6 @@ func main() { kuikController.SetLeader(true) }() - kuikController.ProbeAddr = probeAddr kuikController.RegisterMetrics(mgr.GetClient()) setupLog.Info("starting manager") diff --git a/internal/controller/collector.go b/internal/controller/collector.go index f3b971a8..d61408e1 100644 --- a/internal/controller/collector.go +++ b/internal/controller/collector.go @@ -4,9 +4,13 @@ import ( "context" "strconv" - kuikv1alpha1 "github.com/enix/kube-image-keeper/api/kuik/v1alpha1" + "github.com/enix/kube-image-keeper/api/kuik/v1alpha1" + "github.com/enix/kube-image-keeper/internal/controller/core" kuikMetrics "github.com/enix/kube-image-keeper/internal/metrics" + "github.com/enix/kube-image-keeper/internal/registry" "github.com/prometheus/client_golang/prometheus" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics" @@ -14,9 +18,16 @@ import ( const subsystem = "controller" -var ProbeAddr = "" - var ( + ImageCachingRequest = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: kuikMetrics.Namespace, + Subsystem: subsystem, + Name: "image_caching_request", + Help: "Number of request to cache an image", + }, + []string{"successful", "upstream_registry"}, + ) ImagePutInCache = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: kuikMetrics.Namespace, @@ -39,7 +50,7 @@ var ( Name: "is_leader", Help: "Whether or not this replica is a leader. 1 if it is, 0 otherwise.", }) - up = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Up = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ Namespace: kuikMetrics.Namespace, Subsystem: subsystem, Name: "up", @@ -53,52 +64,154 @@ var ( cachedImagesMetric = prometheus.BuildFQName(kuikMetrics.Namespace, subsystem, "cached_images") cachedImagesHelp = "Number of images expected to be cached" - cachedImagesDesc = prometheus.NewDesc(cachedImagesMetric, cachedImagesHelp, []string{"cached", "expiring"}, nil) + cachedImagesDesc = prometheus.NewDesc(cachedImagesMetric, cachedImagesHelp, []string{"status", "cached", "expiring"}, nil) + + repositoriesMetric = prometheus.BuildFQName(kuikMetrics.Namespace, subsystem, "repositories") + repositoriesHelp = "Number of repositories" + repositoriesDesc = prometheus.NewDesc(repositoriesMetric, repositoriesHelp, []string{"status"}, nil) + + containersWithCachedImageMetric = prometheus.BuildFQName(kuikMetrics.Namespace, subsystem, "containers_with_cached_image") + containersWithCachedImageHelp = "Number of containers that have been rewritten to use a cached image" + containersWithCachedImageDesc = prometheus.NewDesc(containersWithCachedImageMetric, containersWithCachedImageHelp, []string{"status", "cached"}, nil) ) func RegisterMetrics(client client.Client) { // Register custom metrics with the global prometheus registry metrics.Registry.MustRegister( + ImageCachingRequest, ImagePutInCache, ImageRemovedFromCache, kuikMetrics.NewInfo(subsystem), isLeader, - up, + Up, &ControllerCollector{ Client: client, }, ) } -func cachedImagesWithLabelValues(gaugeVec *prometheus.GaugeVec, cachedImage *kuikv1alpha1.CachedImage) prometheus.Gauge { - return gaugeVec.WithLabelValues(strconv.FormatBool(cachedImage.Status.IsCached), strconv.FormatBool(cachedImage.Spec.ExpiresAt != nil)) -} - type ControllerCollector struct { client.Client } func (c *ControllerCollector) Describe(ch chan<- *prometheus.Desc) { ch <- cachedImagesDesc + ch <- repositoriesDesc + ch <- containersWithCachedImageDesc } func (c *ControllerCollector) Collect(ch chan<- prometheus.Metric) { - cachedImageList := &kuikv1alpha1.CachedImageList{} - if err := c.List(context.Background(), cachedImageList); err == nil { - cachedImageGaugeVec := prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: cachedImagesMetric, - Help: cachedImagesHelp, - }, - []string{"cached", "expiring"}, - ) - for _, cachedImage := range cachedImageList.Items { - cachedImagesWithLabelValues(cachedImageGaugeVec, &cachedImage).Inc() - } - cachedImageGaugeVec.Collect(ch) + if cachedImagesGaugeVec, err := c.getCachedImagesMetric(); err == nil { + cachedImagesGaugeVec.Collect(ch) } else { - log.FromContext(context.TODO()).Error(err, "could not collect "+cachedImagesMetric+" metric") + log.FromContext(context.Background()).Error(err, "could not collect "+cachedImagesMetric+" metric") } + + if repositoriesGaugeVec, err := c.getRepositoriesMetric(); err == nil { + repositoriesGaugeVec.Collect(ch) + } else { + log.FromContext(context.Background()).Error(err, "could not collect "+repositoriesMetric+" metric") + } + + if containersWithCachedImageGaugeVec, err := c.getContainersWithCachedImageMetric(); err == nil { + containersWithCachedImageGaugeVec.Collect(ch) + } else { + log.FromContext(context.Background()).Error(err, "could not collect "+containersWithCachedImageMetric+" metric") + } +} + +func (c *ControllerCollector) getCachedImagesMetric() (*prometheus.GaugeVec, error) { + cachedImageList := &v1alpha1.CachedImageList{} + if err := c.List(context.Background(), cachedImageList); err != nil { + return nil, err + } + + cachedImagesGaugeVec := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: cachedImagesMetric, + Help: cachedImagesHelp, + }, + []string{"status", "cached", "expiring"}, + ) + for _, cachedImage := range cachedImageList.Items { + cachedImagesGaugeVec. + WithLabelValues(cachedImage.Status.Phase, strconv.FormatBool(cachedImage.Status.IsCached), strconv.FormatBool(cachedImage.Spec.ExpiresAt != nil)). + Inc() + } + + return cachedImagesGaugeVec, nil +} + +func (c *ControllerCollector) getContainersWithCachedImageMetric() (*prometheus.GaugeVec, error) { + cachedImageList := &v1alpha1.CachedImageList{} + if err := c.List(context.Background(), cachedImageList); err != nil { + return nil, err + } + + cachedImages := map[string]v1alpha1.CachedImage{} + for _, cachedImage := range cachedImageList.Items { + cachedImages[cachedImage.Name] = cachedImage + } + + podList := &corev1.PodList{} + labelSelector := metav1.LabelSelector{ + MatchLabels: map[string]string{ + core.LabelManagedName: "true", + }, + } + selector, err := metav1.LabelSelectorAsSelector(&labelSelector) + if err != nil { + return nil, err + } + if err := c.List(context.Background(), podList, &client.ListOptions{LabelSelector: selector}); err != nil { + return nil, err + } + + containersWithCachedImageGaugeVec := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: containersWithCachedImageMetric, + Help: containersWithCachedImageHelp, + }, + []string{"status", "cached"}, + ) + for _, pod := range podList.Items { + for _, container := range pod.Spec.Containers { + annotationKey := registry.ContainerAnnotationKey(container.Name, false) + if sourceImage, ok := pod.ObjectMeta.Annotations[annotationKey]; ok { + cachedImageName, err := v1alpha1.CachedImageNameFromSourceImage(sourceImage) + if err != nil { + return nil, err + } + if cachedImage, ok := cachedImages[cachedImageName]; ok { + containersWithCachedImageGaugeVec. + WithLabelValues(cachedImage.Status.Phase, strconv.FormatBool(cachedImage.Status.IsCached)). + Inc() + } + } + } + } + + return containersWithCachedImageGaugeVec, nil +} + +func (c *ControllerCollector) getRepositoriesMetric() (*prometheus.GaugeVec, error) { + repositoriesList := &v1alpha1.RepositoryList{} + if err := c.List(context.Background(), repositoriesList); err != nil { + return nil, err + } + + repositoriesGaugeVec := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: repositoriesMetric, + Help: repositoriesHelp, + }, + []string{"status"}, + ) + for _, repository := range repositoriesList.Items { + repositoriesGaugeVec.WithLabelValues(repository.Status.Phase).Inc() + } + + return repositoriesGaugeVec, nil } func SetLeader(leader bool) { diff --git a/internal/controller/core/pod_controller.go b/internal/controller/core/pod_controller.go index 15839a6c..44de8fdf 100644 --- a/internal/controller/core/pod_controller.go +++ b/internal/controller/core/pod_controller.go @@ -3,7 +3,6 @@ package core import ( "context" _ "crypto/sha256" - "strings" "golang.org/x/exp/maps" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -11,7 +10,7 @@ import ( "k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/types" - "github.com/distribution/reference" + "github.com/enix/kube-image-keeper/api/kuik/v1alpha1" kuikv1alpha1 "github.com/enix/kube-image-keeper/api/kuik/v1alpha1" "github.com/enix/kube-image-keeper/internal/registry" corev1 "k8s.io/api/core/v1" @@ -162,7 +161,13 @@ func (r *PodReconciler) podsWithDeletingCachedImages(ctx context.Context, obj cl } var podList corev1.PodList - podRequirements, _ := labels.NewRequirement(LabelManagedName, selection.Equals, []string{"true"}) + podRequirements, err := labels.NewRequirement(LabelManagedName, selection.Equals, []string{"true"}) + if err != nil { + // errors cannot be handled in a better way for now (see https://github.com/kubernetes-sigs/controller-runtime/issues/1996) + // maybe we don't need to enqueue all Pods related to this CachedImage but only those in the status UsedBy + log.Error(err, "could not list pods") + return nil + } selector := labels.NewSelector() selector = selector.Add(*podRequirements) if err := r.List(ctx, &podList, &client.ListOptions{ @@ -250,16 +255,11 @@ func desiredCachedImagesForContainers(ctx context.Context, containers []corev1.C } func cachedImageFromSourceImage(sourceImage string) (*kuikv1alpha1.CachedImage, error) { - ref, err := reference.ParseAnyReference(sourceImage) + sanitizedName, err := v1alpha1.CachedImageNameFromSourceImage(sourceImage) if err != nil { return nil, err } - sanitizedName := registry.SanitizeName(ref.String()) - if !strings.Contains(sourceImage, ":") { - sanitizedName += "-latest" - } - cachedImage := kuikv1alpha1.CachedImage{ TypeMeta: metav1.TypeMeta{APIVersion: kuikv1alpha1.GroupVersion.String(), Kind: "CachedImage"}, ObjectMeta: metav1.ObjectMeta{ diff --git a/internal/controller/kuik/cachedimage_controller.go b/internal/controller/kuik/cachedimage_controller.go index 8aa44153..6e8b3898 100644 --- a/internal/controller/kuik/cachedimage_controller.go +++ b/internal/controller/kuik/cachedimage_controller.go @@ -4,6 +4,7 @@ import ( "context" "crypto/x509" "net/http" + "strconv" "strings" "time" @@ -239,8 +240,14 @@ func (r *CachedImageReconciler) Reconcile(ctx context.Context, req ctrl.Request) putImageInCache = false } if putImageInCache { + upstream, err := cachedImage.Upstream() + if err != nil { + return ctrl.Result{}, err + } + r.Recorder.Eventf(&cachedImage, "Normal", "Caching", "Start caching image %s", cachedImage.Spec.SourceImage) err = r.cacheImage(&cachedImage) + kuikController.ImageCachingRequest.WithLabelValues(strconv.FormatBool(err == nil), upstream).Inc() if err != nil { log.Error(err, "failed to cache image") r.Recorder.Eventf(&cachedImage, "Warning", "CacheFailed", "Failed to cache image %s, reason: %s", cachedImage.Spec.SourceImage, err) @@ -401,9 +408,6 @@ func (r *CachedImageReconciler) SetupWithManager(mgr ctrl.Manager, maxConcurrent &corev1.Pod{}, handler.EnqueueRequestsFromMapFunc(r.cachedImagesRequestFromPod), builder.WithPredicates(predicate.Funcs{ - // GenericFunc: func(e event.GenericEvent) bool { - // return true - // }, DeleteFunc: func(e event.DeleteEvent) bool { pod := e.Object.(*corev1.Pod) var currentPod corev1.Pod