From 4e06b89e5680dac8ca535e2a0eb9c0b70deb8f15 Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Sun, 6 Oct 2024 23:29:24 -0500 Subject: [PATCH 1/6] Implement Refresh Worker Certificates Logic --- .../controllers/certificates_controller.go | 67 ++++++++++++- go.mod | 2 +- go.sum | 2 + pkg/ck8s/workload_cluster.go | 95 ++++++++++++++++++- 4 files changed, 158 insertions(+), 8 deletions(-) diff --git a/bootstrap/controllers/certificates_controller.go b/bootstrap/controllers/certificates_controller.go index 39ca2b98..6e313a60 100644 --- a/bootstrap/controllers/certificates_controller.go +++ b/bootstrap/controllers/certificates_controller.go @@ -149,7 +149,9 @@ func (r *CertificatesReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{}, err } } else { - log.Info("worker nodes are not supported yet") + if err := r.refreshWorkerCertificates(ctx, scope); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil } } @@ -188,7 +190,7 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con extraSANs := controlPlaneConfig.ExtraSANs extraSANs = append(extraSANs, controlPlaneEndpoint) - expirySecondsUnix, err := scope.Workload.RefreshCertificates(ctx, scope.Machine, *nodeToken, seconds, extraSANs) + expirySecondsUnix, err := scope.Workload.RefreshControlPlaneCertificates(ctx, scope.Machine, *nodeToken, seconds, extraSANs) if err != nil { r.recorder.Eventf( scope.Machine, @@ -245,3 +247,64 @@ func (r *CertificatesReconciler) updateExpiryDateAnnotation(ctx context.Context, return nil } + +func (r *CertificatesReconciler) refreshWorkerCertificates(ctx context.Context, scope *CertificatesScope) error { + nodeToken, err := token.LookupNodeToken(ctx, r.Client, util.ObjectKey(scope.Cluster), scope.Machine.Name) + if err != nil { + return fmt.Errorf("failed to lookup node token: %w", err) + } + + mAnnotations := scope.Machine.GetAnnotations() + + refreshAnnotation, ok := mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] + if !ok { + return nil + } + + r.recorder.Eventf( + scope.Machine, + corev1.EventTypeNormal, + bootstrapv1.CertificatesRefreshInProgressEvent, + "Certificates refresh in progress. TTL: %s", refreshAnnotation, + ) + + seconds, err := utiltime.TTLToSeconds(refreshAnnotation) + if err != nil { + return fmt.Errorf("failed to parse expires-in annotation value: %w", err) + } + + expirySecondsUnix, err := scope.Workload.RefreshWorkerCertificates(ctx, scope.Machine, *nodeToken, seconds) + if err != nil { + r.recorder.Eventf( + scope.Machine, + corev1.EventTypeWarning, + bootstrapv1.CertificatesRefreshFailedEvent, + "Failed to refresh certificates: %v", err, + ) + return fmt.Errorf("failed to refresh certificates: %w", err) + } + + expiryTime := time.Unix(int64(expirySecondsUnix), 0) + + delete(mAnnotations, bootstrapv1.CertificatesRefreshAnnotation) + mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryTime.Format(time.RFC3339) + scope.Machine.SetAnnotations(mAnnotations) + if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { + return fmt.Errorf("failed to patch machine annotations: %w", err) + } + + r.recorder.Eventf( + scope.Machine, + corev1.EventTypeNormal, + bootstrapv1.CertificatesRefreshDoneEvent, + "Certificates refreshed, will expire at %s", expiryTime, + ) + + scope.Log.Info("Certificates refreshed", + "cluster", scope.Cluster.Name, + "machine", scope.Machine.Name, + "expiry", expiryTime.Format(time.RFC3339), + ) + + return nil +} diff --git a/go.mod b/go.mod index 52fd366d..e09f131e 100644 --- a/go.mod +++ b/go.mod @@ -119,7 +119,7 @@ require ( golang.org/x/mod v0.19.0 golang.org/x/net v0.23.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect - golang.org/x/sync v0.6.0 // indirect + golang.org/x/sync v0.8.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/go.sum b/go.sum index bbb4cc19..9b2643a8 100644 --- a/go.sum +++ b/go.sum @@ -361,6 +361,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index 769144f8..535436e9 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -10,6 +10,7 @@ import ( apiv1 "github.com/canonical/k8s-snap-api/api/v1" "github.com/pkg/errors" + "golang.org/x/sync/errgroup" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -242,7 +243,32 @@ func (w *Workload) GetCertificatesExpiryDate(ctx context.Context, machine *clust return response.ExpiryDate, nil } -func (w *Workload) RefreshCertificates(ctx context.Context, machine *clusterv1.Machine, nodeToken string, expirationSeconds int, extraSANs []string) (int, error) { +type ApproveWorkerCSRRequest struct { + Seed int `json:"seed"` +} + +type ApproveWorkerCSRResponse struct{} + +func (w *Workload) ApproveCertificates(ctx context.Context, machine *clusterv1.Machine, capiToken string, seed int) error { + request := ApproveWorkerCSRRequest{} + response := &ApproveWorkerCSRResponse{} + k8sdProxy, err := w.GetK8sdProxyForControlPlane(ctx, k8sdProxyOptions{}) + if err != nil { + return fmt.Errorf("failed to create k8sd proxy: %w", err) + } + + header := map[string][]string{ + "capi-auth-token": {w.authToken}, + } + + if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/x/capi/refresh-certs/approve", header, request, response); err != nil { + return fmt.Errorf("failed to approve certificates: %w", err) + } + + return nil +} + +func (w *Workload) refreshCertificatesPlan(ctx context.Context, machine *clusterv1.Machine, nodeToken string) (int, error) { planRequest := apiv1.ClusterAPICertificatesPlanRequest{} planResponse := &apiv1.ClusterAPICertificatesPlanResponse{} @@ -259,17 +285,76 @@ func (w *Workload) RefreshCertificates(ctx context.Context, machine *clusterv1.M return 0, fmt.Errorf("failed to refresh certificates: %w", err) } + return planResponse.Seed, nil +} + +func (w *Workload) refreshCertificatesRun(ctx context.Context, machine *clusterv1.Machine, nodeToken string, request *apiv1.ClusterAPICertificatesRunRequest) (int, error) { + runResponse := &apiv1.ClusterAPICertificatesRunResponse{} + header := map[string][]string{ + "node-token": {nodeToken}, + } + + k8sdProxy, err := w.GetK8sdProxyForMachine(ctx, machine) + if err != nil { + return 0, fmt.Errorf("failed to create k8sd proxy: %w", err) + } + + if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/x/capi/refresh-certs/run", header, request, runResponse); err != nil { + return 0, fmt.Errorf("failed to run refresh certificates: %w", err) + } + + return runResponse.ExpirationSeconds, nil +} + +func (w *Workload) RefreshWorkerCertificates(ctx context.Context, machine *clusterv1.Machine, nodeToken string, expirationSeconds int) (int, error) { + seed, err := w.refreshCertificatesPlan(ctx, machine, nodeToken) + if err != nil { + return 0, fmt.Errorf("failed to get refresh certificates plan: %w", err) + } + + request := apiv1.ClusterAPICertificatesRunRequest{ + Seed: seed, + ExpirationSeconds: expirationSeconds, + } + + var seconds int + + eg, ctx := errgroup.WithContext(ctx) + eg.Go(func() error { + seconds, err = w.refreshCertificatesRun(ctx, machine, nodeToken, &request) + return err + }) + + eg.Go(func() error { + return w.ApproveCertificates(ctx, machine, nodeToken, seed) + }) + + if err := eg.Wait(); err != nil { + return 0, fmt.Errorf("failed to refresh worker certificates: %w", err) + } + + return seconds, nil + +} + +func (w *Workload) RefreshControlPlaneCertificates(ctx context.Context, machine *clusterv1.Machine, nodeToken string, expirationSeconds int, extraSANs []string) (int, error) { + seed, err := w.refreshCertificatesPlan(ctx, machine, nodeToken) + if err != nil { + return 0, fmt.Errorf("failed to get refresh certificates plan: %w", err) + } + runRequest := apiv1.ClusterAPICertificatesRunRequest{ ExpirationSeconds: expirationSeconds, - Seed: planResponse.Seed, + Seed: seed, ExtraSANs: extraSANs, } - runResponse := &apiv1.ClusterAPICertificatesRunResponse{} - if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/x/capi/refresh-certs/run", header, runRequest, runResponse); err != nil { + + seconds, err := w.refreshCertificatesRun(ctx, machine, nodeToken, &runRequest) + if err != nil { return 0, fmt.Errorf("failed to run refresh certificates: %w", err) } - return runResponse.ExpirationSeconds, nil + return seconds, nil } func (w *Workload) RefreshMachine(ctx context.Context, machine *clusterv1.Machine, nodeToken string, upgradeOption string) (string, error) { From 6c139b3e278e2c6533a98e61633b5d903cf97019 Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Mon, 7 Oct 2024 09:10:14 -0500 Subject: [PATCH 2/6] Wrap Workload cluster errors --- pkg/ck8s/workload_cluster.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index 535436e9..5cf6de1d 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -322,11 +322,17 @@ func (w *Workload) RefreshWorkerCertificates(ctx context.Context, machine *clust eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { seconds, err = w.refreshCertificatesRun(ctx, machine, nodeToken, &request) - return err + if err != nil { + return fmt.Errorf("failed to run refresh certificates: %w", err) + } + return nil }) eg.Go(func() error { - return w.ApproveCertificates(ctx, machine, nodeToken, seed) + if err := w.ApproveCertificates(ctx, machine, nodeToken, seed); err != nil { + return fmt.Errorf("failed to approve certificates: %w", err) + } + return nil }) if err := eg.Wait(); err != nil { @@ -334,7 +340,6 @@ func (w *Workload) RefreshWorkerCertificates(ctx context.Context, machine *clust } return seconds, nil - } func (w *Workload) RefreshControlPlaneCertificates(ctx context.Context, machine *clusterv1.Machine, nodeToken string, expirationSeconds int, extraSANs []string) (int, error) { From 78f1b4881d7b850dc0f50ae68e390721bc576cba Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Tue, 15 Oct 2024 18:19:41 -0500 Subject: [PATCH 3/6] Address Code Review and Update Endpoint Structs --- go.mod | 4 +- go.sum | 6 +-- pkg/ck8s/workload_cluster.go | 72 +++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 40 deletions(-) diff --git a/go.mod b/go.mod index e09f131e..eadc6dfb 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/canonical/cluster-api-k8s go 1.22.6 require ( - github.com/canonical/k8s-snap-api v1.0.8 + github.com/canonical/k8s-snap-api v1.0.10 github.com/go-logr/logr v1.4.1 github.com/google/uuid v1.4.0 github.com/onsi/ginkgo v1.16.5 @@ -119,7 +119,7 @@ require ( golang.org/x/mod v0.19.0 golang.org/x/net v0.23.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect - golang.org/x/sync v0.8.0 // indirect + golang.org/x/sync v0.8.0 golang.org/x/sys v0.18.0 // indirect golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/go.sum b/go.sum index 9b2643a8..61e4d9a6 100644 --- a/go.sum +++ b/go.sum @@ -29,8 +29,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bwesterb/go-ristretto v1.2.0/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= -github.com/canonical/k8s-snap-api v1.0.8 h1:W360Y4ulkAdCdQqbfQ7zXs3/Ty8SWENO3/Bzz8ZAEPE= -github.com/canonical/k8s-snap-api v1.0.8/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= +github.com/canonical/k8s-snap-api v1.0.10 h1:BoAw4Vr8mR8MWTKeZZxH5LmrF3JYGSZHDv+KEo5ifoU= +github.com/canonical/k8s-snap-api v1.0.10/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= @@ -359,8 +359,6 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index 5cf6de1d..68c034b0 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -228,9 +228,7 @@ func (w *Workload) GetCertificatesExpiryDate(ctx context.Context, machine *clust request := apiv1.CertificatesExpiryRequest{} response := &apiv1.CertificatesExpiryResponse{} - header := map[string][]string{ - "node-token": {nodeToken}, - } + header := w.newHeaderWithNodeToken(nodeToken) k8sdProxy, err := w.GetK8sdProxyForMachine(ctx, machine) if err != nil { return "", fmt.Errorf("failed to create k8sd proxy: %w", err) @@ -243,23 +241,15 @@ func (w *Workload) GetCertificatesExpiryDate(ctx context.Context, machine *clust return response.ExpiryDate, nil } -type ApproveWorkerCSRRequest struct { - Seed int `json:"seed"` -} - -type ApproveWorkerCSRResponse struct{} - -func (w *Workload) ApproveCertificates(ctx context.Context, machine *clusterv1.Machine, capiToken string, seed int) error { - request := ApproveWorkerCSRRequest{} - response := &ApproveWorkerCSRResponse{} +func (w *Workload) ApproveCertificates(ctx context.Context, machine *clusterv1.Machine, seed int) error { + request := apiv1.ClusterAPIApproveWorkerCSRRequest{} + response := &apiv1.ClusterAPIApproveWorkerCSRResponse{} k8sdProxy, err := w.GetK8sdProxyForControlPlane(ctx, k8sdProxyOptions{}) if err != nil { return fmt.Errorf("failed to create k8sd proxy: %w", err) } - header := map[string][]string{ - "capi-auth-token": {w.authToken}, - } + header := w.newHeaderWithCAPIAuthToken() if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/x/capi/refresh-certs/approve", header, request, response); err != nil { return fmt.Errorf("failed to approve certificates: %w", err) @@ -272,9 +262,7 @@ func (w *Workload) refreshCertificatesPlan(ctx context.Context, machine *cluster planRequest := apiv1.ClusterAPICertificatesPlanRequest{} planResponse := &apiv1.ClusterAPICertificatesPlanResponse{} - header := map[string][]string{ - "node-token": {nodeToken}, - } + header := w.newHeaderWithNodeToken(nodeToken) k8sdProxy, err := w.GetK8sdProxyForMachine(ctx, machine) if err != nil { @@ -290,9 +278,7 @@ func (w *Workload) refreshCertificatesPlan(ctx context.Context, machine *cluster func (w *Workload) refreshCertificatesRun(ctx context.Context, machine *clusterv1.Machine, nodeToken string, request *apiv1.ClusterAPICertificatesRunRequest) (int, error) { runResponse := &apiv1.ClusterAPICertificatesRunResponse{} - header := map[string][]string{ - "node-token": {nodeToken}, - } + header := w.newHeaderWithNodeToken(nodeToken) k8sdProxy, err := w.GetK8sdProxyForMachine(ctx, machine) if err != nil { @@ -306,6 +292,18 @@ func (w *Workload) refreshCertificatesRun(ctx context.Context, machine *clusterv return runResponse.ExpirationSeconds, nil } +// RefreshWorkerCertificates approves the worker node CSR and refreshes the certificates. +// The certificate approval process follows these steps: +// 1. The CAPI provider calls the /x/capi/refresh-certs/plan endpoint from the +// worker node, which generates the CSRs and creates the CertificateSigningRequest +// objects in the cluster. +// 2. The CAPI provider then calls the /x/capi/refresh-certs/plan endpoint with +// the seed. This endpoint waits until the CSR is approved and the certificate +// is signed. Note that this is a blocking call. +// 3. The CAPI provider calls the /x/capi/refresh-certs/approve endpoint from +// any control plane node to approve the CSRs. +// 4. The /x/capi/refresh-certs/plan endpoint completes and returns once the +// certificate is approved and signed. func (w *Workload) RefreshWorkerCertificates(ctx context.Context, machine *clusterv1.Machine, nodeToken string, expirationSeconds int) (int, error) { seed, err := w.refreshCertificatesPlan(ctx, machine, nodeToken) if err != nil { @@ -329,7 +327,7 @@ func (w *Workload) RefreshWorkerCertificates(ctx context.Context, machine *clust }) eg.Go(func() error { - if err := w.ApproveCertificates(ctx, machine, nodeToken, seed); err != nil { + if err := w.ApproveCertificates(ctx, machine, seed); err != nil { return fmt.Errorf("failed to approve certificates: %w", err) } return nil @@ -382,9 +380,7 @@ func (w *Workload) RefreshMachine(ctx context.Context, machine *clusterv1.Machin return "", fmt.Errorf("failed to create k8sd proxy: %w", err) } - header := map[string][]string{ - "node-token": {nodeToken}, - } + header := w.newHeaderWithNodeToken(nodeToken) if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/snap/refresh", header, request, response); err != nil { return "", fmt.Errorf("failed to refresh machine %s: %w", machine.Name, err) @@ -403,9 +399,7 @@ func (w *Workload) GetRefreshStatusForMachine(ctx context.Context, machine *clus return nil, fmt.Errorf("failed to create k8sd proxy: %w", err) } - header := map[string][]string{ - "node-token": {nodeToken}, - } + header := w.newHeaderWithNodeToken(nodeToken) if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/snap/refresh-status", header, request, response); err != nil { return nil, fmt.Errorf("failed to refresh machine %s: %w", machine.Name, err) @@ -438,9 +432,7 @@ func (w *Workload) requestJoinToken(ctx context.Context, name string, worker boo return "", fmt.Errorf("failed to create k8sd proxy: %w", err) } - header := map[string][]string{ - "capi-auth-token": {w.authToken}, - } + header := w.newHeaderWithCAPIAuthToken() if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/x/capi/generate-join-token", header, request, response); err != nil { return "", fmt.Errorf("failed to get join token: %w", err) @@ -467,9 +459,7 @@ func (w *Workload) RemoveMachineFromCluster(ctx context.Context, machine *cluste return fmt.Errorf("failed to create k8sd proxy: %w", err) } - header := map[string][]string{ - "capi-auth-token": {w.authToken}, - } + header := w.newHeaderWithCAPIAuthToken() if err := w.doK8sdRequest(ctx, k8sdProxy, http.MethodPost, "1.0/x/capi/remove-node", header, request, nil); err != nil { return fmt.Errorf("failed to remove %s from cluster: %w", machine.Name, err) @@ -525,6 +515,20 @@ func (w *Workload) doK8sdRequest(ctx context.Context, k8sdProxy *K8sdClient, met return nil } +// newHeaderWithCAPIAuthToken returns a map with the CAPI auth token as a header. +func (w *Workload) newHeaderWithCAPIAuthToken() map[string][]string { + return map[string][]string{ + "capi-auth-token": {w.authToken}, + } +} + +// newHeaderWithNodeToken returns a map with the node token as a header. +func (w *Workload) newHeaderWithNodeToken(nodeToken string) map[string][]string { + return map[string][]string{ + "node-token": {nodeToken}, + } +} + // UpdateAgentConditions is responsible for updating machine conditions reflecting the status of all the control plane // components. This operation is best effort, in the sense that in case // of problems in retrieving the pod status, it sets the condition to Unknown state without returning any error. From 3453cf8f495644c583366914cdc0e91567ad422a Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Tue, 15 Oct 2024 18:26:14 -0500 Subject: [PATCH 4/6] Remove errors --- pkg/ck8s/workload_cluster.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index 66c011a6..c6468ad5 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -9,7 +9,6 @@ import ( "strings" apiv1 "github.com/canonical/k8s-snap-api/api/v1" - "github.com/pkg/errors" "golang.org/x/sync/errgroup" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" From 58875938ed8c63a851260f5f241e66d5a6ece58b Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Thu, 17 Oct 2024 19:01:17 -0500 Subject: [PATCH 5/6] Address code review --- bootstrap/controllers/certificates_controller.go | 5 ++--- go.mod | 2 +- go.sum | 4 ++-- pkg/ck8s/config_init.go | 9 +++++---- pkg/ck8s/workload_cluster.go | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bootstrap/controllers/certificates_controller.go b/bootstrap/controllers/certificates_controller.go index 3ec9164b..a36b664c 100644 --- a/bootstrap/controllers/certificates_controller.go +++ b/bootstrap/controllers/certificates_controller.go @@ -146,13 +146,12 @@ func (r *CertificatesReconciler) Reconcile(ctx context.Context, req ctrl.Request if refreshCertificates { if configOwner.IsControlPlaneMachine() { if err := r.refreshControlPlaneCertificates(ctx, scope); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, fmt.Errorf("failed to refresh control plane certificates: %w", err) } } else { if err := r.refreshWorkerCertificates(ctx, scope); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, fmt.Errorf("failed to refresh worker certificates: %w", err) } - return ctrl.Result{}, nil } } diff --git a/go.mod b/go.mod index 7704a244..8940ca5e 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/canonical/cluster-api-k8s go 1.22.6 require ( - github.com/canonical/k8s-snap-api v1.0.10 + github.com/canonical/k8s-snap-api v1.0.11 github.com/go-logr/logr v1.4.1 github.com/google/uuid v1.4.0 github.com/onsi/ginkgo v1.16.5 diff --git a/go.sum b/go.sum index 61e4d9a6..a69bd9a4 100644 --- a/go.sum +++ b/go.sum @@ -29,8 +29,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bwesterb/go-ristretto v1.2.0/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= -github.com/canonical/k8s-snap-api v1.0.10 h1:BoAw4Vr8mR8MWTKeZZxH5LmrF3JYGSZHDv+KEo5ifoU= -github.com/canonical/k8s-snap-api v1.0.10/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= +github.com/canonical/k8s-snap-api v1.0.11 h1:nGtwrUQBLiaL3HUXFx2gb4kq6qVpl2yNwMwHVX0dEok= +github.com/canonical/k8s-snap-api v1.0.11/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= diff --git a/pkg/ck8s/config_init.go b/pkg/ck8s/config_init.go index 2ceff580..b6458e64 100644 --- a/pkg/ck8s/config_init.go +++ b/pkg/ck8s/config_init.go @@ -5,6 +5,7 @@ import ( "strings" apiv1 "github.com/canonical/k8s-snap-api/api/v1" + apiv1_annotations "github.com/canonical/k8s-snap-api/api/v1/annotations" "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -87,12 +88,12 @@ func GenerateInitControlPlaneConfig(cfg InitControlPlaneConfig) (apiv1.Bootstrap out.ClusterConfig.Annotations = map[string]string{} } - if _, ok := out.ClusterConfig.Annotations[apiv1.AnnotationSkipCleanupKubernetesNodeOnRemove]; !ok { - out.ClusterConfig.Annotations[apiv1.AnnotationSkipCleanupKubernetesNodeOnRemove] = "true" + if _, ok := out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipCleanupKubernetesNodeOnRemove]; !ok { + out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipCleanupKubernetesNodeOnRemove] = "true" } - if _, ok := out.ClusterConfig.Annotations[apiv1.AnnotationSkipStopServicesOnRemove]; !ok { - out.ClusterConfig.Annotations[apiv1.AnnotationSkipStopServicesOnRemove] = "true" + if _, ok := out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipStopServicesOnRemove]; !ok { + out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipStopServicesOnRemove] = "true" } // features diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index c6468ad5..ebec9057 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -264,12 +264,12 @@ func (w *Workload) refreshCertificatesRun(ctx context.Context, machine *clusterv // 1. The CAPI provider calls the /x/capi/refresh-certs/plan endpoint from the // worker node, which generates the CSRs and creates the CertificateSigningRequest // objects in the cluster. -// 2. The CAPI provider then calls the /x/capi/refresh-certs/plan endpoint with +// 2. The CAPI provider then calls the /x/capi/refresh-certs/run endpoint with // the seed. This endpoint waits until the CSR is approved and the certificate // is signed. Note that this is a blocking call. // 3. The CAPI provider calls the /x/capi/refresh-certs/approve endpoint from // any control plane node to approve the CSRs. -// 4. The /x/capi/refresh-certs/plan endpoint completes and returns once the +// 4. The /x/capi/refresh-certs/run endpoint completes and returns once the // certificate is approved and signed. func (w *Workload) RefreshWorkerCertificates(ctx context.Context, machine *clusterv1.Machine, nodeToken string, expirationSeconds int) (int, error) { seed, err := w.refreshCertificatesPlan(ctx, machine, nodeToken) From cdb76e0d0fbd17f771a0858c4cf16e5c0c32286d Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Thu, 24 Oct 2024 17:29:36 -0500 Subject: [PATCH 6/6] Refactor Controller Logic and Add Integration Tests --- .github/workflows/e2e.yaml | 1 + .../v1beta2/certificates_refresh_consts.go | 9 +- ...ootstrap.cluster.x-k8s.io_ck8sconfigs.yaml | 6 +- ....cluster.x-k8s.io_ck8sconfigtemplates.yaml | 6 +- .../controllers/certificates_controller.go | 250 ++++++++---------- c1.yaml | 103 ++++++++ ...ne.cluster.x-k8s.io_ck8scontrolplanes.yaml | 6 +- ...er.x-k8s.io_ck8scontrolplanetemplates.yaml | 8 +- kubeconfig | 20 ++ pkg/ck8s/workload_cluster.go | 4 +- test/e2e/config/ck8s-docker.yaml | 1 + test/e2e/helpers.go | 142 ++++++++++ test/e2e/refresh_certs_test.go | 139 ++++++++++ 13 files changed, 547 insertions(+), 148 deletions(-) create mode 100644 c1.yaml create mode 100644 kubeconfig create mode 100644 test/e2e/refresh_certs_test.go diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 2fbbd3a3..1a8faeb8 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -65,6 +65,7 @@ jobs: - "Workload cluster creation" - "Workload cluster scaling" - "Workload cluster upgrade" + - "Certificate Refresh" # TODO(ben): Remove once all tests are running stable. fail-fast: false steps: diff --git a/bootstrap/api/v1beta2/certificates_refresh_consts.go b/bootstrap/api/v1beta2/certificates_refresh_consts.go index ec53cd06..f4cbe39c 100644 --- a/bootstrap/api/v1beta2/certificates_refresh_consts.go +++ b/bootstrap/api/v1beta2/certificates_refresh_consts.go @@ -1,7 +1,14 @@ package v1beta2 const ( - CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates" + CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates" + CertificatesRefreshStatusAnnotation = "v1beta2.k8sd.io/refresh-certificates-status" +) + +const ( + CertificatesRefreshInProgressStatus = "in-progress" + CertificatesRefreshDoneStatus = "done" + CertificatesRefreshFailedStatus = "failed" ) const ( diff --git a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml index b237b1aa..18ec8325 100644 --- a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml +++ b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml @@ -51,9 +51,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -87,6 +84,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml index c868cfd5..d9e2d8e7 100644 --- a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml +++ b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml @@ -58,9 +58,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -94,6 +91,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/bootstrap/controllers/certificates_controller.go b/bootstrap/controllers/certificates_controller.go index a36b664c..0142d3f0 100644 --- a/bootstrap/controllers/certificates_controller.go +++ b/bootstrap/controllers/certificates_controller.go @@ -27,18 +27,25 @@ import ( // CertificatesReconciler reconciles a Machine's certificates. type CertificatesReconciler struct { client.Client - Log logr.Logger - Scheme *runtime.Scheme - recorder record.EventRecorder - - K8sdDialTimeout time.Duration - + Log logr.Logger + Scheme *runtime.Scheme + recorder record.EventRecorder + K8sdDialTimeout time.Duration managementCluster ck8s.ManagementCluster } +type CertificatesScope struct { + Cluster *clusterv1.Cluster + Config *bootstrapv1.CK8sConfig + Log logr.Logger + Machine *clusterv1.Machine + Patcher *patch.Helper + Workload *ck8s.Workload +} + // SetupWithManager sets up the controller with the Manager. func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error { - if _, err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Build(r); err != nil { + if err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Complete(r); err != nil { return err } @@ -54,15 +61,6 @@ func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error { return nil } -type CertificatesScope struct { - Cluster *clusterv1.Cluster - Config *bootstrapv1.CK8sConfig - Log logr.Logger - Machine *clusterv1.Machine - Patcher *patch.Helper - Workload *ck8s.Workload -} - // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs/status,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status;machines;machines/status,verbs=get;list;watch @@ -77,98 +75,133 @@ func (r *CertificatesReconciler) Reconcile(ctx context.Context, req ctrl.Request if apierrors.IsNotFound(err) { return ctrl.Result{}, nil } - // Error reading the object - requeue the request. return ctrl.Result{}, err } + if m.Status.NodeRef == nil { + // If the machine does not have a node ref, we requeue the request to retry. + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + if !m.ObjectMeta.DeletionTimestamp.IsZero() { // Machine is being deleted, return early. return ctrl.Result{}, nil } mAnnotations := m.GetAnnotations() + if mAnnotations == nil { + mAnnotations = map[string]string{} + } var refreshCertificates, hasExpiryDateAnnotation bool _, refreshCertificates = mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] _, hasExpiryDateAnnotation = mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] + + if mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] == bootstrapv1.CertificatesRefreshInProgressStatus { + if !refreshCertificates { + // If a refresh is in progress but the refresh annotation is missing + // clear the status. + delete(mAnnotations, bootstrapv1.CertificatesRefreshStatusAnnotation) + m.SetAnnotations(mAnnotations) + if err := r.Client.Update(ctx, m); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to clear status annotation: %w", err) + } + return ctrl.Result{}, nil + } + log.Info("Certificates refresh already in progress", + "refreshStatus", bootstrapv1.CertificatesRefreshInProgressStatus, + "refreshAnnotation", mAnnotations[bootstrapv1.CertificatesRefreshAnnotation], + ) + return ctrl.Result{}, nil + } + if !refreshCertificates && hasExpiryDateAnnotation { // No need to refresh certificates or update expiry date, return early. return ctrl.Result{}, nil } - // Look up for the CK8sConfig. + scope, err := r.createScope(ctx, m, log) + if err != nil { + return ctrl.Result{}, err + } + + if !hasExpiryDateAnnotation { + if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil { + return ctrl.Result{}, err + } + } + + if refreshCertificates { + if err := r.refreshCertificates(ctx, scope); err != nil { + // On error, we requeue the request to retry. + mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshFailedStatus + m.SetAnnotations(mAnnotations) + if err := r.Client.Update(ctx, m); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to clear status annotation after error: %w", err) + } + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} + +func (r *CertificatesReconciler) createScope(ctx context.Context, m *clusterv1.Machine, log logr.Logger) (*CertificatesScope, error) { config := &bootstrapv1.CK8sConfig{} if err := r.Client.Get(ctx, types.NamespacedName{Namespace: m.Namespace, Name: m.Spec.Bootstrap.ConfigRef.Name}, config); err != nil { - return ctrl.Result{}, err + return nil, fmt.Errorf("failed to get CK8sConfig: %w", err) } - // Get the owner of the CK8sConfig to determine if it's a control plane or worker node. configOwner, err := bsutil.GetConfigOwner(ctx, r.Client, config) - if err != nil { - log.Error(err, "Failed to get config owner") - return ctrl.Result{}, err - } - if configOwner == nil { - return ctrl.Result{}, nil + if err != nil || configOwner == nil { + return nil, fmt.Errorf("failed to get config owner: %w", err) } cluster, err := util.GetClusterByName(ctx, r.Client, m.GetNamespace(), m.Spec.ClusterName) if err != nil { - return ctrl.Result{}, err + return nil, fmt.Errorf("failed to get cluster: %w", err) } - microclusterPort := config.Spec.ControlPlaneConfig.GetMicroclusterPort() - workload, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster), microclusterPort) + workload, err := r.managementCluster.GetWorkloadCluster( + ctx, + util.ObjectKey(cluster), + config.Spec.ControlPlaneConfig.GetMicroclusterPort(), + ) if err != nil { - return ctrl.Result{}, err + return nil, fmt.Errorf("failed to get workload cluster: %w", err) } patchHelper, err := patch.NewHelper(m, r.Client) if err != nil { - return ctrl.Result{}, fmt.Errorf("failed to create patch helper for machine: %w", err) + return nil, fmt.Errorf("failed to create patch helper: %w", err) } - scope := &CertificatesScope{ + return &CertificatesScope{ Log: log, Machine: m, Config: config, Cluster: cluster, Patcher: patchHelper, Workload: workload, - } - - if !hasExpiryDateAnnotation { - if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil { - return ctrl.Result{}, err - } - } - - if refreshCertificates { - if configOwner.IsControlPlaneMachine() { - if err := r.refreshControlPlaneCertificates(ctx, scope); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to refresh control plane certificates: %w", err) - } - } else { - if err := r.refreshWorkerCertificates(ctx, scope); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to refresh worker certificates: %w", err) - } - } - } - - return ctrl.Result{}, nil + }, nil } -func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Context, scope *CertificatesScope) error { +func (r *CertificatesReconciler) refreshCertificates(ctx context.Context, scope *CertificatesScope) error { nodeToken, err := token.LookupNodeToken(ctx, r.Client, util.ObjectKey(scope.Cluster), scope.Machine.Name) if err != nil { return fmt.Errorf("failed to lookup node token: %w", err) } mAnnotations := scope.Machine.GetAnnotations() - refreshAnnotation, ok := mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] if !ok { - return nil + return fmt.Errorf("refresh annotation not found") + } + + mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshInProgressStatus + scope.Machine.SetAnnotations(mAnnotations) + if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { + return fmt.Errorf("failed to set in-progress status: %w", err) } r.recorder.Eventf( @@ -180,16 +213,31 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con seconds, err := utiltime.TTLToSeconds(refreshAnnotation) if err != nil { - return fmt.Errorf("failed to parse expires-in annotation value: %w", err) + return fmt.Errorf("failed to parse TTL: %w", err) } - controlPlaneConfig := scope.Config.Spec.ControlPlaneConfig - controlPlaneEndpoint := scope.Cluster.Spec.ControlPlaneEndpoint.Host - - extraSANs := controlPlaneConfig.ExtraSANs - extraSANs = append(extraSANs, controlPlaneEndpoint) + var expirySecondsUnix int + configOwner, _ := bsutil.GetConfigOwner(ctx, r.Client, scope.Config) + if configOwner.IsControlPlaneMachine() { + var extraSANs []string + extraSANs = append(extraSANs, scope.Config.Spec.ControlPlaneConfig.ExtraSANs...) + extraSANs = append(extraSANs, scope.Cluster.Spec.ControlPlaneEndpoint.Host) + expirySecondsUnix, err = scope.Workload.RefreshControlPlaneCertificates( + ctx, + scope.Machine, + *nodeToken, + seconds, + extraSANs, + ) + } else { + expirySecondsUnix, err = scope.Workload.RefreshWorkerCertificates( + ctx, + scope.Machine, + *nodeToken, + seconds, + ) + } - expirySecondsUnix, err := scope.Workload.RefreshControlPlaneCertificates(ctx, scope.Machine, *nodeToken, seconds, extraSANs) if err != nil { r.recorder.Eventf( scope.Machine, @@ -201,10 +249,11 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con } expiryTime := time.Unix(int64(expirySecondsUnix), 0) - delete(mAnnotations, bootstrapv1.CertificatesRefreshAnnotation) + mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshDoneStatus mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryTime.Format(time.RFC3339) scope.Machine.SetAnnotations(mAnnotations) + if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { return fmt.Errorf("failed to patch machine annotations: %w", err) } @@ -231,82 +280,17 @@ func (r *CertificatesReconciler) updateExpiryDateAnnotation(ctx context.Context, return fmt.Errorf("failed to lookup node token: %w", err) } - mAnnotations := scope.Machine.GetAnnotations() - if mAnnotations == nil { - mAnnotations = map[string]string{} - } - expiryDateString, err := scope.Workload.GetCertificatesExpiryDate(ctx, scope.Machine, *nodeToken) if err != nil { return fmt.Errorf("failed to get certificates expiry date: %w", err) } - mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString - scope.Machine.SetAnnotations(mAnnotations) - if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { - return fmt.Errorf("failed to patch machine annotations: %w", err) - } - - return nil -} - -func (r *CertificatesReconciler) refreshWorkerCertificates(ctx context.Context, scope *CertificatesScope) error { - nodeToken, err := token.LookupNodeToken(ctx, r.Client, util.ObjectKey(scope.Cluster), scope.Machine.Name) - if err != nil { - return fmt.Errorf("failed to lookup node token: %w", err) - } - mAnnotations := scope.Machine.GetAnnotations() - - refreshAnnotation, ok := mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] - if !ok { - return nil - } - - r.recorder.Eventf( - scope.Machine, - corev1.EventTypeNormal, - bootstrapv1.CertificatesRefreshInProgressEvent, - "Certificates refresh in progress. TTL: %s", refreshAnnotation, - ) - - seconds, err := utiltime.TTLToSeconds(refreshAnnotation) - if err != nil { - return fmt.Errorf("failed to parse expires-in annotation value: %w", err) - } - - expirySecondsUnix, err := scope.Workload.RefreshWorkerCertificates(ctx, scope.Machine, *nodeToken, seconds) - if err != nil { - r.recorder.Eventf( - scope.Machine, - corev1.EventTypeWarning, - bootstrapv1.CertificatesRefreshFailedEvent, - "Failed to refresh certificates: %v", err, - ) - return fmt.Errorf("failed to refresh certificates: %w", err) + if mAnnotations == nil { + mAnnotations = map[string]string{} } - expiryTime := time.Unix(int64(expirySecondsUnix), 0) - - delete(mAnnotations, bootstrapv1.CertificatesRefreshAnnotation) - mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryTime.Format(time.RFC3339) + mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString scope.Machine.SetAnnotations(mAnnotations) - if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { - return fmt.Errorf("failed to patch machine annotations: %w", err) - } - - r.recorder.Eventf( - scope.Machine, - corev1.EventTypeNormal, - bootstrapv1.CertificatesRefreshDoneEvent, - "Certificates refreshed, will expire at %s", expiryTime, - ) - - scope.Log.Info("Certificates refreshed", - "cluster", scope.Cluster.Name, - "machine", scope.Machine.Name, - "expiry", expiryTime.Format(time.RFC3339), - ) - - return nil + return scope.Patcher.Patch(ctx, scope.Machine) } diff --git a/c1.yaml b/c1.yaml new file mode 100644 index 00000000..6f13045d --- /dev/null +++ b/c1.yaml @@ -0,0 +1,103 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: c1 + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 10.1.0.0/16 + serviceDomain: cluster.local + services: + cidrBlocks: + - 10.152.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: CK8sControlPlane + name: c1-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerCluster + name: c1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerCluster +metadata: + name: c1 + namespace: default +spec: {} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 +kind: CK8sControlPlane +metadata: + name: c1-control-plane + namespace: default +spec: + machineTemplate: + infrastructureTemplate: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-control-plane + replicas: 1 + spec: + airGapped: true + controlPlane: + extraKubeAPIServerArgs: + --anonymous-auth: "true" + version: v1.31.1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-control-plane + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: c1-worker-md-0 + namespace: default +spec: + clusterName: c1 + replicas: 1 + selector: + matchLabels: + cluster.x-k8s.io/cluster-name: c1 + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: CK8sConfigTemplate + name: c1-md-0 + clusterName: c1 + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-md-0 + version: v1.31.1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 +kind: CK8sConfigTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + airGapped: true diff --git a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml index 7dc56812..3fcbaace 100644 --- a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml +++ b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml @@ -246,9 +246,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -282,6 +279,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml index ff6e0fcd..6491f93d 100644 --- a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml +++ b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml @@ -221,10 +221,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap - install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -258,6 +254,10 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap + install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/kubeconfig b/kubeconfig new file mode 100644 index 00000000..1429eaed --- /dev/null +++ b/kubeconfig @@ -0,0 +1,20 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM2akNDQWRLZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJME1UQXlNekUxTXpZek1Gb1hEVE0wTVRBeU1URTFOREV6TUZvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBSnIvCjl6bVNaNlVtbE1wNXVyN2NqeHpMKy8vRm5SNkhMMkt6dnR3NUx0YlJ3K0kyR01aWmpkYUZjakpGNEpnTEhlejYKZ1Fnbm55LzhUTnd2ckwyTXcybHRMK1VmYWZzRFllQnRiZzhUSHIwVE9CVUFxWkd2cmxEbndzRnFzMU5HSlBEQQpUcUlvNExYTWQyS2tMS3g5UCt1UWdaNU5MdDBpTFFsL1dlRU9SNThPeVNOaDFua3pNVS9oYmJPYjBlWkc3S2lyCkhTY3gwaWNjNFhHd1VMSzVJUXZuR1NTVGlVZmh3cG92N3FUa0RZeWV4ZUxhWVNQVDZyVmhRZkxJNVBqcEtqNHIKM3dJRHV6eks4NEdNRkx6UXBmMUNQaW5mQjFzR3B1QXV1bHZzMlM4VDBieWxYc2llUTliZ3FHakdNczVreW0wUApPZ2dZUVVSYklBUVhNUHUwQ3cwQ0F3RUFBYU5GTUVNd0RnWURWUjBQQVFIL0JBUURBZ0trTUJJR0ExVWRFd0VCCi93UUlNQVlCQWY4Q0FRQXdIUVlEVlIwT0JCWUVGRTFJSlJHVkdrcldOODBOMTZpZFI1QTl3VGpaTUEwR0NTcUcKU0liM0RRRUJDd1VBQTRJQkFRQjRDSm1kYlB3WXpWMXlOUUpFbkFGNW9aa0NTa0IzR25uVVBKT1VsdnJtNm9yYQpENDRldHJDZitHL3JSLzMvaXN0bUtLY1BENzM4UUhlOHlGWkcwVDZLcVZYM09GZldqTFJZVUpPa2h2ZVc5d1g1Cm9FeWtqbEZ6ZGQ5cWp2Vk5BRjZEcDdCTXY2d0xjbUVMQXMweGRJQ0pFSWV4RWRUcHNQdDFnYnRnNVE5VzJOOVQKS0NZTEFEOEFKRlFRUFpyN2JJZlZycDVPNG5iOHhVSlBJYjNLOTd6cVlqUHNnSGpZVm9kSDAzTUpLV01UTDEydAp2WTB6dm9KWHREOHV1WVEyakJjYnNGWjJibTl6eDkrM1EvL2tlMVdNVE5paFJzQmdqdUtBSmQ5dU9vMGNLUUhDClV4Z3hrbENQQ1lIbUZTYmNJeU9vTm9IRzgyNjV0cXZKWjFFWmRUanAKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://172.18.0.3:6443 + name: c1 +contexts: +- context: + cluster: c1 + user: c1-admin + name: c1-admin@c1 +current-context: c1-admin@c1 +kind: Config +preferences: {} +users: +- name: c1-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURFekNDQWZ1Z0F3SUJBZ0lJZHRMeUdaTUtrcGN3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TkRFd01qTXhOVE0yTXpCYUZ3MHlOVEV3TWpNeE5UUXhNekJhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQTBVL0V1TmluWDdZMURsRnYKSWxJaXg0QWFVMDhBdkh5WXIwQUtQcnNvRmtnTVNVQnlkcDZBQmdqZGRpWTZ3RUw1eitFN0NDZXpQN29nRlhRbApxVlVUWlhlRnljdVFhUzdBN001WUh3dVNNaHJuaHRwTHplY3hDT1hhODBXMEgxcFNQdnhwdXlxVTZsMEx4a3crCjBUZU15cjdRTlpUWmx6Q2lmdmlyOEduQU45ZGZQZVlmeWtKaDlkK3gyekxFdE1ZT1ozKzlnQjlsMUFzQmRMVjQKbnFHTVVWV1VWaEc3am9hMDlaYjh4UTc5dTJMamVNNkV0b2oyTHFOR1FONzJva1hZT1owaXI3RjhpYTcyeEFnRwpHVTQzZDBsa3owdTd1ZjNaaVJKUGhvQ0YwUHJBSFRDVmxpTWVXVHFLN0ZBV0p6UTcrcnFmS08rSng0SUFCNTN6CnRLZE1CUUlEQVFBQm8wZ3dSakFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0h3WURWUjBqQkJnd0ZvQVV6K0RXS29UTW1LYmozT2IxQ3RuSE1qZzk2cVF3RFFZSktvWklodmNOQVFFTApCUUFEZ2dFQkFFWEY5dXNDVGVxY29GTm9NNkM2MEdvaWVVNVhVR1VseHBYMndQMTFYSU5KZ0lkcXZWa0VFT2M3CmpmdG8wQXM2clJwazcyRmNCWnF3UzVaL2xxUEFlQUFudWpSUnhVWHdhck02NVhoY3h3UGJ3NHdTQjlsR0k5aGIKZGVlMS8zbGdIUEhGZEdRYk90aFJlN3phUitXWUMzRVFFWk02TWtNTVE5WEFEK25aOUhDSGIzSURqdWtwTUwxQwpQQ1FBUW1TUnpsa21zU1N2aCtsYjQ0cnJvK3NpU29kTDFiWTZwWnljODc0OTIrVytVOWxYOFVSZGhRVVZETGZrCmdqenc4ancvbmhmTGlhZmphZTV2K1M0eW5uc1Z4dTlMcEVTdTNGRDRIUS80Zk1QYWpDWTYrZjhmZFY1WWZoRmsKSmRyTnowMU8wZmcrdisrTWpBUm5hbk1QY2RqbEJROD0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb3dJQkFBS0NBUUVBMFUvRXVOaW5YN1kxRGxGdklsSWl4NEFhVTA4QXZIeVlyMEFLUHJzb0ZrZ01TVUJ5CmRwNkFCZ2pkZGlZNndFTDV6K0U3Q0NlelA3b2dGWFFscVZVVFpYZUZ5Y3VRYVM3QTdNNVlId3VTTWhybmh0cEwKemVjeENPWGE4MFcwSDFwU1B2eHB1eXFVNmwwTHhrdyswVGVNeXI3UU5aVFpsekNpZnZpcjhHbkFOOWRmUGVZZgp5a0poOWQreDJ6TEV0TVlPWjMrOWdCOWwxQXNCZExWNG5xR01VVldVVmhHN2pvYTA5WmI4eFE3OXUyTGplTTZFCnRvajJMcU5HUU43Mm9rWFlPWjBpcjdGOGlhNzJ4QWdHR1U0M2QwbGt6MHU3dWYzWmlSSlBob0NGMFByQUhUQ1YKbGlNZVdUcUs3RkFXSnpRNytycWZLTytKeDRJQUI1M3p0S2RNQlFJREFRQUJBb0lCQUZOYTAvblU5ZHFzNGpZZwpnZ1NGOXluQnZ6b2NCU3dLNW5jc0pxaFN1aVVkYmg4aDFqT2M3Vmo1NE0vemE0UXpaKzY4UUZrQURtYzdoUmxtCkZyVjNsdkRhaDJJVWVoOFpLbVZqeXo0WnlvKy9uU0piL3NJeDVwb25oMmxJQWZKV2N0aDRodGF0bjM0eHJjVzcKaVd5aUZhU01Gcml2M1hOVFAvTVdyc3BnSXRkWmFlLzlaNkk3SnFvdE00aWVYOXc5U1hGcWNCUUFTTHhQZGFDMgp3QlAzNEJSS1RmQ1BLMTU4Z09ma0pNSStNblhYOVgvNE52d056a09aVENQTVE0WFRSOGx2YXJvRENkT1VBUnpPCmovcWFFYkswanJIYUJGNzd6cWp3QXd0MW9SUllwWGFSOFl6ekorUmZUZWN2SWwrK3p2TXAxZDhhOWI5MGM3bjAKYkdjSTlZVUNnWUVBOWtnTktYeDdLajhXc1hDK29uWVJsZ0RSOHVvVUQ2MDJRRldWS3oxU0diSTEwdm93UFZwYgpNODBqK2dUUTVVdThLamhlSTBuTEpXK1FSY0RGU0pGbUUvekFKN0NGRGppRkQ4ZVpuMGVOeEFqYWZHRGt4WklaCi9OT09iNUp4UDR5RllaaXhBcDV6Zzd3R2E5UTEyalVRaXgvM3dGbjVkRFk3ZFZhS2hBR2NKUk1DZ1lFQTJaSSsKM0JRaVN4elQxVmplQzJkdC8wWnJPOWdWV3BsYlZQQkdqd042MG1JaERzQndpNWlmaDI2SVhqSFVObHBEa3FlZQozKytrVzhNNlIycklKV0VjcnR0NnVWeWlXQnFXMG9IVENzV2JwVFFXaEZHT3JOcklCTGkrMWZHWjBZMTgydTRxCkkySXo0Sk01NHVjTVcrbjNobGZzZFpjSWVpbWF0WmRub2JDOEpZY0NnWUIxTlhrK0kwWWlwdi9QMXM2RFZ4bXIKZ0J4ck5VZnk1Z2FKdTdGUWNOQ2Y5aFp6b0NwUktLMTdDOGh4ZHRWTUt5MFVFdHVLZzlZd3JOWEd2S1dua2JzNQpDL01QY0kwQ1paZStHTXBkNlpub2tDWWJSNm5ZOVYxMDIrSlA2eXdHaUlQNkhNY3hiZU9mOEY1Rjl5cmgvSnN3Cit6Zlh6WHBRK25aMXM0Z2NwdjkvRFFLQmdDTWlPYUppWWZOTk1XdjNVTXY0cE11ZlBIdktkaEJPU1hCYTZKbjUKanVEZ1ZjUkRFU21KdU9FdTJUaVV4VmNObG5IZFBZdWQ4Q1dkVGhEd1RtZXkyZVhtclZlM2ZNUExiMldJNzJLQwo3Skp0NmVEdUpxTUZKVjJWMDhOS28zTXliT0lScVo1VElsdDJpdGQ0UmdlbHJZSDFPc3IyVzdrV2ROUTBJZGo3CmFEYUxBb0dCQUl2cjFQWDB1L0pqc0xxNWZmaEc4YWJVaDU2MHBWa0ZvMWs4V2xQSlU5UGEwQ216RnBVMVQ0eCsKQlRKQ3pON0xrU0tKRFNoL01GbW9RUVFCL0ZEVEhPakZMcTM4WVZFd2x4YzJOQ25sa2VZY09vQk1wQ0xHeThFYQpSdEpiTDMyVFlHZmo1OUVJR2Q3bXhJLzd5SGVERmx3N2R0bzUzOUVIblFielRzcUwxeXN1Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg== + diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index ebec9057..63c5f353 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -209,7 +209,9 @@ func (w *Workload) GetCertificatesExpiryDate(ctx context.Context, machine *clust } func (w *Workload) ApproveCertificates(ctx context.Context, machine *clusterv1.Machine, seed int) error { - request := apiv1.ClusterAPIApproveWorkerCSRRequest{} + request := apiv1.ClusterAPIApproveWorkerCSRRequest{ + Seed: seed, + } response := &apiv1.ClusterAPIApproveWorkerCSRResponse{} k8sdProxy, err := w.GetK8sdProxyForControlPlane(ctx, k8sdProxyOptions{}) if err != nil { diff --git a/test/e2e/config/ck8s-docker.yaml b/test/e2e/config/ck8s-docker.yaml index 8dba00e0..e2eb6722 100644 --- a/test/e2e/config/ck8s-docker.yaml +++ b/test/e2e/config/ck8s-docker.yaml @@ -105,6 +105,7 @@ intervals: default/wait-nodes-ready: ["10m", "10s"] default/wait-machine-remediation: ["5m", "10s"] default/wait-autoscaler: ["5m", "10s"] + default/wait-machine-refresh: ["5m", "10s"] node-drain/wait-deployment-available: ["3m", "10s"] node-drain/wait-control-plane: ["15m", "10s"] node-drain/wait-machine-deleted: ["2m", "10s"] diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 42ad619a..7c771503 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -554,6 +554,148 @@ func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForContr }) } +type ApplyCertificateRefreshAndWaitInput struct { + Getter framework.Getter + Machine *clusterv1.Machine + ClusterProxy framework.ClusterProxy + TTL string + WaitForRefreshIntervals []interface{} +} + +func ApplyCertificateRefreshAndWait(ctx context.Context, input ApplyCertificateRefreshAndWaitInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.Machine).ToNot(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.TTL).ToNot(BeEmpty()) + + mgmtClient := input.ClusterProxy.GetClient() + + patchHelper, err := patch.NewHelper(input.Machine, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + + mAnnotations := input.Machine.GetAnnotations() + if mAnnotations == nil { + mAnnotations = map[string]string{} + } + + mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] = input.TTL + input.Machine.SetAnnotations(mAnnotations) + err = patchHelper.Patch(ctx, input.Machine) + Expect(err).ToNot(HaveOccurred()) + + By("Waiting for certificates to be refreshed") + Eventually(func() (bool, error) { + machine := &clusterv1.Machine{} + if err := input.Getter.Get(ctx, client.ObjectKey{ + Namespace: input.Machine.Namespace, + Name: input.Machine.Name, + }, machine); err != nil { + return false, err + } + + mAnnotations := machine.GetAnnotations() + if mAnnotations == nil { + return false, nil + } + + status, ok := mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] + if !ok { + return false, nil + } + + if status == bootstrapv1.CertificatesRefreshFailedStatus { + return false, fmt.Errorf("certificates refresh failed for machine %s", machine.Name) + } + + return status == bootstrapv1.CertificatesRefreshDoneStatus, nil + }, input.WaitForRefreshIntervals...).Should(BeTrue(), "Certificates refresh failed for %s", input.Machine.Name) +} + +type ApplyCertificateRefreshForControlPlaneInput struct { + Lister framework.Lister + Getter framework.Getter + ClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + TTL string + WaitForRefreshIntervals []interface{} +} + +func ApplyCertificateRefreshForControlPlane(ctx context.Context, input ApplyCertificateRefreshForControlPlaneInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.Cluster).ToNot(BeNil()) + Expect(input.TTL).ToNot(BeEmpty()) + + By("Looking up control plane machines") + machineList := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, machineList, + client.InNamespace(input.Cluster.Namespace), + client.MatchingLabels{ + clusterv1.ClusterNameLabel: input.Cluster.Name, + clusterv1.MachineControlPlaneLabel: "", + }) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), + "Failed to list control plane machines for cluster %q", input.Cluster.Name) + + for i := range machineList.Items { + machine := &machineList.Items[i] + By(fmt.Sprintf("Refreshing certificates for control plane machine: %s", machine.Name)) + ApplyCertificateRefreshAndWait(ctx, ApplyCertificateRefreshAndWaitInput{ + Getter: input.Getter, + Machine: machine, + ClusterProxy: input.ClusterProxy, + TTL: input.TTL, + WaitForRefreshIntervals: input.WaitForRefreshIntervals, + }) + } +} + +type ApplyCertificateRefreshForWorkerInput struct { + Lister framework.Lister + Getter framework.Getter + ClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + MachineDeployments []*clusterv1.MachineDeployment + TTL string + WaitForRefreshIntervals []interface{} +} + +func ApplyCertificateRefreshForWorker(ctx context.Context, input ApplyCertificateRefreshForWorkerInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.Cluster).ToNot(BeNil()) + Expect(input.MachineDeployments).ToNot(BeNil()) + Expect(input.TTL).ToNot(BeEmpty()) + + for _, md := range input.MachineDeployments { + By(fmt.Sprintf("Refreshing certificates for machines in deployment %s", md.Name)) + + inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace) + matchClusterListOption := client.MatchingLabels{ + clusterv1.ClusterNameLabel: input.Cluster.Name, + clusterv1.MachineDeploymentNameLabel: md.Name, + } + + machineList := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list machines for deployment %q in the cluster %q", md.Name, input.Cluster.Name) + + for i := range machineList.Items { + machine := &machineList.Items[i] + By(fmt.Sprintf("Refreshing certificates for worker machine: %s", machine.Name)) + ApplyCertificateRefreshAndWait(ctx, ApplyCertificateRefreshAndWaitInput{ + Getter: input.Getter, + Machine: machine, + ClusterProxy: input.ClusterProxy, + TTL: input.TTL, + WaitForRefreshIntervals: input.WaitForRefreshIntervals, + }) + } + } +} + type ApplyInPlaceUpgradeAndWaitInput struct { Getter framework.Getter Machine *clusterv1.Machine diff --git a/test/e2e/refresh_certs_test.go b/test/e2e/refresh_certs_test.go new file mode 100644 index 00000000..d28160e3 --- /dev/null +++ b/test/e2e/refresh_certs_test.go @@ -0,0 +1,139 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "path/filepath" + "time" + + bootstrapv1 "github.com/canonical/cluster-api-k8s/bootstrap/api/v1beta2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var _ = Describe("Certificate Refresh", func() { + var ( + ctx = context.TODO() + specName = "workload-cluster-certificate-refresh" + namespace *corev1.Namespace + cancelWatches context.CancelFunc + result *ApplyClusterTemplateAndWaitResult + clusterName string + clusterctlLogFolder string + infrastructureProvider string + ) + + BeforeEach(func() { + Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) + + clusterName = fmt.Sprintf("capick8s-certificate-refresh-%s", util.RandomString(6)) + infrastructureProvider = clusterctl.DefaultInfrastructureProvider + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) + result = new(ApplyClusterTemplateAndWaitResult) + clusterctlLogFolder = filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()) + }) + + AfterEach(func() { + cleanInput := cleanupInput{ + SpecName: specName, + Cluster: result.Cluster, + ClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + CancelWatches: cancelWatches, + IntervalsGetter: e2eConfig.GetIntervals, + SkipCleanup: skipCleanup, + ArtifactFolder: artifactFolder, + } + + dumpSpecResourcesAndCleanup(ctx, cleanInput) + }) + + Context("Performing certificate refresh", func() { + It("Should successfully refresh certificates for a cluster [PR-Blocking]", func() { + By("Creating a workload cluster with a single control plane and a single worker node") + ApplyClusterTemplateAndWait(ctx, ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: clusterctlLogFolder, + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: infrastructureProvider, + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), + ControlPlaneMachineCount: ptr.To(int64(1)), + WorkerMachineCount: ptr.To(int64(1)), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), + }, result) + + bootstrapProxyClient := bootstrapClusterProxy.GetClient() + + By("Refreshing certificates for the control plane nodes") + ApplyCertificateRefreshForControlPlane(ctx, ApplyCertificateRefreshForControlPlaneInput{ + Lister: bootstrapProxyClient, + Getter: bootstrapProxyClient, + ClusterProxy: bootstrapClusterProxy, + Cluster: result.Cluster, + TTL: "1y", + WaitForRefreshIntervals: e2eConfig.GetIntervals(specName, "wait-machine-refresh"), + }) + + By("Refreshing certificates for the worker nodes") + ApplyCertificateRefreshForWorker(ctx, ApplyCertificateRefreshForWorkerInput{ + Lister: bootstrapProxyClient, + Getter: bootstrapProxyClient, + ClusterProxy: bootstrapClusterProxy, + Cluster: result.Cluster, + MachineDeployments: result.MachineDeployments, + TTL: "1y", + WaitForRefreshIntervals: e2eConfig.GetIntervals(specName, "wait-machine-refresh"), + }) + + By("Verifying certificates expiry dates are updated") + machineList := &clusterv1.MachineList{} + Expect(bootstrapProxyClient.List(ctx, machineList, + client.InNamespace(result.Cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: result.Cluster.Name}, + )).To(Succeed()) + + for _, machine := range machineList.Items { + mAnnotations := machine.GetAnnotations() + Expect(mAnnotations).To(HaveKey(bootstrapv1.MachineCertificatesExpiryDateAnnotation)) + Expect(mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation]).To(Equal(bootstrapv1.CertificatesRefreshDoneStatus)) + + _, err := time.Parse(time.RFC3339, mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation]) + Expect(err).NotTo(HaveOccurred()) + } + }) + }) +})