diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 2fbbd3a3..1a8faeb8 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -65,6 +65,7 @@ jobs: - "Workload cluster creation" - "Workload cluster scaling" - "Workload cluster upgrade" + - "Certificate Refresh" # TODO(ben): Remove once all tests are running stable. fail-fast: false steps: diff --git a/bootstrap/api/v1beta2/certificates_refresh_consts.go b/bootstrap/api/v1beta2/certificates_refresh_consts.go index ec53cd06..f4cbe39c 100644 --- a/bootstrap/api/v1beta2/certificates_refresh_consts.go +++ b/bootstrap/api/v1beta2/certificates_refresh_consts.go @@ -1,7 +1,14 @@ package v1beta2 const ( - CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates" + CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates" + CertificatesRefreshStatusAnnotation = "v1beta2.k8sd.io/refresh-certificates-status" +) + +const ( + CertificatesRefreshInProgressStatus = "in-progress" + CertificatesRefreshDoneStatus = "done" + CertificatesRefreshFailedStatus = "failed" ) const ( diff --git a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml index b237b1aa..18ec8325 100644 --- a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml +++ b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml @@ -51,9 +51,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -87,6 +84,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml index c868cfd5..d9e2d8e7 100644 --- a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml +++ b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml @@ -58,9 +58,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -94,6 +91,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/bootstrap/controllers/certificates_controller.go b/bootstrap/controllers/certificates_controller.go index a36b664c..0142d3f0 100644 --- a/bootstrap/controllers/certificates_controller.go +++ b/bootstrap/controllers/certificates_controller.go @@ -27,18 +27,25 @@ import ( // CertificatesReconciler reconciles a Machine's certificates. type CertificatesReconciler struct { client.Client - Log logr.Logger - Scheme *runtime.Scheme - recorder record.EventRecorder - - K8sdDialTimeout time.Duration - + Log logr.Logger + Scheme *runtime.Scheme + recorder record.EventRecorder + K8sdDialTimeout time.Duration managementCluster ck8s.ManagementCluster } +type CertificatesScope struct { + Cluster *clusterv1.Cluster + Config *bootstrapv1.CK8sConfig + Log logr.Logger + Machine *clusterv1.Machine + Patcher *patch.Helper + Workload *ck8s.Workload +} + // SetupWithManager sets up the controller with the Manager. func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error { - if _, err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Build(r); err != nil { + if err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Complete(r); err != nil { return err } @@ -54,15 +61,6 @@ func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error { return nil } -type CertificatesScope struct { - Cluster *clusterv1.Cluster - Config *bootstrapv1.CK8sConfig - Log logr.Logger - Machine *clusterv1.Machine - Patcher *patch.Helper - Workload *ck8s.Workload -} - // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs/status,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status;machines;machines/status,verbs=get;list;watch @@ -77,98 +75,133 @@ func (r *CertificatesReconciler) Reconcile(ctx context.Context, req ctrl.Request if apierrors.IsNotFound(err) { return ctrl.Result{}, nil } - // Error reading the object - requeue the request. return ctrl.Result{}, err } + if m.Status.NodeRef == nil { + // If the machine does not have a node ref, we requeue the request to retry. + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + if !m.ObjectMeta.DeletionTimestamp.IsZero() { // Machine is being deleted, return early. return ctrl.Result{}, nil } mAnnotations := m.GetAnnotations() + if mAnnotations == nil { + mAnnotations = map[string]string{} + } var refreshCertificates, hasExpiryDateAnnotation bool _, refreshCertificates = mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] _, hasExpiryDateAnnotation = mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] + + if mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] == bootstrapv1.CertificatesRefreshInProgressStatus { + if !refreshCertificates { + // If a refresh is in progress but the refresh annotation is missing + // clear the status. + delete(mAnnotations, bootstrapv1.CertificatesRefreshStatusAnnotation) + m.SetAnnotations(mAnnotations) + if err := r.Client.Update(ctx, m); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to clear status annotation: %w", err) + } + return ctrl.Result{}, nil + } + log.Info("Certificates refresh already in progress", + "refreshStatus", bootstrapv1.CertificatesRefreshInProgressStatus, + "refreshAnnotation", mAnnotations[bootstrapv1.CertificatesRefreshAnnotation], + ) + return ctrl.Result{}, nil + } + if !refreshCertificates && hasExpiryDateAnnotation { // No need to refresh certificates or update expiry date, return early. return ctrl.Result{}, nil } - // Look up for the CK8sConfig. + scope, err := r.createScope(ctx, m, log) + if err != nil { + return ctrl.Result{}, err + } + + if !hasExpiryDateAnnotation { + if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil { + return ctrl.Result{}, err + } + } + + if refreshCertificates { + if err := r.refreshCertificates(ctx, scope); err != nil { + // On error, we requeue the request to retry. + mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshFailedStatus + m.SetAnnotations(mAnnotations) + if err := r.Client.Update(ctx, m); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to clear status annotation after error: %w", err) + } + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} + +func (r *CertificatesReconciler) createScope(ctx context.Context, m *clusterv1.Machine, log logr.Logger) (*CertificatesScope, error) { config := &bootstrapv1.CK8sConfig{} if err := r.Client.Get(ctx, types.NamespacedName{Namespace: m.Namespace, Name: m.Spec.Bootstrap.ConfigRef.Name}, config); err != nil { - return ctrl.Result{}, err + return nil, fmt.Errorf("failed to get CK8sConfig: %w", err) } - // Get the owner of the CK8sConfig to determine if it's a control plane or worker node. configOwner, err := bsutil.GetConfigOwner(ctx, r.Client, config) - if err != nil { - log.Error(err, "Failed to get config owner") - return ctrl.Result{}, err - } - if configOwner == nil { - return ctrl.Result{}, nil + if err != nil || configOwner == nil { + return nil, fmt.Errorf("failed to get config owner: %w", err) } cluster, err := util.GetClusterByName(ctx, r.Client, m.GetNamespace(), m.Spec.ClusterName) if err != nil { - return ctrl.Result{}, err + return nil, fmt.Errorf("failed to get cluster: %w", err) } - microclusterPort := config.Spec.ControlPlaneConfig.GetMicroclusterPort() - workload, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster), microclusterPort) + workload, err := r.managementCluster.GetWorkloadCluster( + ctx, + util.ObjectKey(cluster), + config.Spec.ControlPlaneConfig.GetMicroclusterPort(), + ) if err != nil { - return ctrl.Result{}, err + return nil, fmt.Errorf("failed to get workload cluster: %w", err) } patchHelper, err := patch.NewHelper(m, r.Client) if err != nil { - return ctrl.Result{}, fmt.Errorf("failed to create patch helper for machine: %w", err) + return nil, fmt.Errorf("failed to create patch helper: %w", err) } - scope := &CertificatesScope{ + return &CertificatesScope{ Log: log, Machine: m, Config: config, Cluster: cluster, Patcher: patchHelper, Workload: workload, - } - - if !hasExpiryDateAnnotation { - if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil { - return ctrl.Result{}, err - } - } - - if refreshCertificates { - if configOwner.IsControlPlaneMachine() { - if err := r.refreshControlPlaneCertificates(ctx, scope); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to refresh control plane certificates: %w", err) - } - } else { - if err := r.refreshWorkerCertificates(ctx, scope); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to refresh worker certificates: %w", err) - } - } - } - - return ctrl.Result{}, nil + }, nil } -func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Context, scope *CertificatesScope) error { +func (r *CertificatesReconciler) refreshCertificates(ctx context.Context, scope *CertificatesScope) error { nodeToken, err := token.LookupNodeToken(ctx, r.Client, util.ObjectKey(scope.Cluster), scope.Machine.Name) if err != nil { return fmt.Errorf("failed to lookup node token: %w", err) } mAnnotations := scope.Machine.GetAnnotations() - refreshAnnotation, ok := mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] if !ok { - return nil + return fmt.Errorf("refresh annotation not found") + } + + mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshInProgressStatus + scope.Machine.SetAnnotations(mAnnotations) + if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { + return fmt.Errorf("failed to set in-progress status: %w", err) } r.recorder.Eventf( @@ -180,16 +213,31 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con seconds, err := utiltime.TTLToSeconds(refreshAnnotation) if err != nil { - return fmt.Errorf("failed to parse expires-in annotation value: %w", err) + return fmt.Errorf("failed to parse TTL: %w", err) } - controlPlaneConfig := scope.Config.Spec.ControlPlaneConfig - controlPlaneEndpoint := scope.Cluster.Spec.ControlPlaneEndpoint.Host - - extraSANs := controlPlaneConfig.ExtraSANs - extraSANs = append(extraSANs, controlPlaneEndpoint) + var expirySecondsUnix int + configOwner, _ := bsutil.GetConfigOwner(ctx, r.Client, scope.Config) + if configOwner.IsControlPlaneMachine() { + var extraSANs []string + extraSANs = append(extraSANs, scope.Config.Spec.ControlPlaneConfig.ExtraSANs...) + extraSANs = append(extraSANs, scope.Cluster.Spec.ControlPlaneEndpoint.Host) + expirySecondsUnix, err = scope.Workload.RefreshControlPlaneCertificates( + ctx, + scope.Machine, + *nodeToken, + seconds, + extraSANs, + ) + } else { + expirySecondsUnix, err = scope.Workload.RefreshWorkerCertificates( + ctx, + scope.Machine, + *nodeToken, + seconds, + ) + } - expirySecondsUnix, err := scope.Workload.RefreshControlPlaneCertificates(ctx, scope.Machine, *nodeToken, seconds, extraSANs) if err != nil { r.recorder.Eventf( scope.Machine, @@ -201,10 +249,11 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con } expiryTime := time.Unix(int64(expirySecondsUnix), 0) - delete(mAnnotations, bootstrapv1.CertificatesRefreshAnnotation) + mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshDoneStatus mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryTime.Format(time.RFC3339) scope.Machine.SetAnnotations(mAnnotations) + if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { return fmt.Errorf("failed to patch machine annotations: %w", err) } @@ -231,82 +280,17 @@ func (r *CertificatesReconciler) updateExpiryDateAnnotation(ctx context.Context, return fmt.Errorf("failed to lookup node token: %w", err) } - mAnnotations := scope.Machine.GetAnnotations() - if mAnnotations == nil { - mAnnotations = map[string]string{} - } - expiryDateString, err := scope.Workload.GetCertificatesExpiryDate(ctx, scope.Machine, *nodeToken) if err != nil { return fmt.Errorf("failed to get certificates expiry date: %w", err) } - mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString - scope.Machine.SetAnnotations(mAnnotations) - if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { - return fmt.Errorf("failed to patch machine annotations: %w", err) - } - - return nil -} - -func (r *CertificatesReconciler) refreshWorkerCertificates(ctx context.Context, scope *CertificatesScope) error { - nodeToken, err := token.LookupNodeToken(ctx, r.Client, util.ObjectKey(scope.Cluster), scope.Machine.Name) - if err != nil { - return fmt.Errorf("failed to lookup node token: %w", err) - } - mAnnotations := scope.Machine.GetAnnotations() - - refreshAnnotation, ok := mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] - if !ok { - return nil - } - - r.recorder.Eventf( - scope.Machine, - corev1.EventTypeNormal, - bootstrapv1.CertificatesRefreshInProgressEvent, - "Certificates refresh in progress. TTL: %s", refreshAnnotation, - ) - - seconds, err := utiltime.TTLToSeconds(refreshAnnotation) - if err != nil { - return fmt.Errorf("failed to parse expires-in annotation value: %w", err) - } - - expirySecondsUnix, err := scope.Workload.RefreshWorkerCertificates(ctx, scope.Machine, *nodeToken, seconds) - if err != nil { - r.recorder.Eventf( - scope.Machine, - corev1.EventTypeWarning, - bootstrapv1.CertificatesRefreshFailedEvent, - "Failed to refresh certificates: %v", err, - ) - return fmt.Errorf("failed to refresh certificates: %w", err) + if mAnnotations == nil { + mAnnotations = map[string]string{} } - expiryTime := time.Unix(int64(expirySecondsUnix), 0) - - delete(mAnnotations, bootstrapv1.CertificatesRefreshAnnotation) - mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryTime.Format(time.RFC3339) + mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString scope.Machine.SetAnnotations(mAnnotations) - if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil { - return fmt.Errorf("failed to patch machine annotations: %w", err) - } - - r.recorder.Eventf( - scope.Machine, - corev1.EventTypeNormal, - bootstrapv1.CertificatesRefreshDoneEvent, - "Certificates refreshed, will expire at %s", expiryTime, - ) - - scope.Log.Info("Certificates refreshed", - "cluster", scope.Cluster.Name, - "machine", scope.Machine.Name, - "expiry", expiryTime.Format(time.RFC3339), - ) - - return nil + return scope.Patcher.Patch(ctx, scope.Machine) } diff --git a/c1.yaml b/c1.yaml new file mode 100644 index 00000000..6f13045d --- /dev/null +++ b/c1.yaml @@ -0,0 +1,103 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: c1 + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 10.1.0.0/16 + serviceDomain: cluster.local + services: + cidrBlocks: + - 10.152.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: CK8sControlPlane + name: c1-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerCluster + name: c1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerCluster +metadata: + name: c1 + namespace: default +spec: {} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 +kind: CK8sControlPlane +metadata: + name: c1-control-plane + namespace: default +spec: + machineTemplate: + infrastructureTemplate: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-control-plane + replicas: 1 + spec: + airGapped: true + controlPlane: + extraKubeAPIServerArgs: + --anonymous-auth: "true" + version: v1.31.1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-control-plane + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: c1-worker-md-0 + namespace: default +spec: + clusterName: c1 + replicas: 1 + selector: + matchLabels: + cluster.x-k8s.io/cluster-name: c1 + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: CK8sConfigTemplate + name: c1-md-0 + clusterName: c1 + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-md-0 + version: v1.31.1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 +kind: CK8sConfigTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + airGapped: true diff --git a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml index 7dc56812..3fcbaace 100644 --- a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml +++ b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml @@ -246,9 +246,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -282,6 +279,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml index ff6e0fcd..6491f93d 100644 --- a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml +++ b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml @@ -221,10 +221,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap - install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -258,6 +254,10 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap + install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. diff --git a/kubeconfig b/kubeconfig new file mode 100644 index 00000000..1429eaed --- /dev/null +++ b/kubeconfig @@ -0,0 +1,20 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM2akNDQWRLZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJME1UQXlNekUxTXpZek1Gb1hEVE0wTVRBeU1URTFOREV6TUZvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBSnIvCjl6bVNaNlVtbE1wNXVyN2NqeHpMKy8vRm5SNkhMMkt6dnR3NUx0YlJ3K0kyR01aWmpkYUZjakpGNEpnTEhlejYKZ1Fnbm55LzhUTnd2ckwyTXcybHRMK1VmYWZzRFllQnRiZzhUSHIwVE9CVUFxWkd2cmxEbndzRnFzMU5HSlBEQQpUcUlvNExYTWQyS2tMS3g5UCt1UWdaNU5MdDBpTFFsL1dlRU9SNThPeVNOaDFua3pNVS9oYmJPYjBlWkc3S2lyCkhTY3gwaWNjNFhHd1VMSzVJUXZuR1NTVGlVZmh3cG92N3FUa0RZeWV4ZUxhWVNQVDZyVmhRZkxJNVBqcEtqNHIKM3dJRHV6eks4NEdNRkx6UXBmMUNQaW5mQjFzR3B1QXV1bHZzMlM4VDBieWxYc2llUTliZ3FHakdNczVreW0wUApPZ2dZUVVSYklBUVhNUHUwQ3cwQ0F3RUFBYU5GTUVNd0RnWURWUjBQQVFIL0JBUURBZ0trTUJJR0ExVWRFd0VCCi93UUlNQVlCQWY4Q0FRQXdIUVlEVlIwT0JCWUVGRTFJSlJHVkdrcldOODBOMTZpZFI1QTl3VGpaTUEwR0NTcUcKU0liM0RRRUJDd1VBQTRJQkFRQjRDSm1kYlB3WXpWMXlOUUpFbkFGNW9aa0NTa0IzR25uVVBKT1VsdnJtNm9yYQpENDRldHJDZitHL3JSLzMvaXN0bUtLY1BENzM4UUhlOHlGWkcwVDZLcVZYM09GZldqTFJZVUpPa2h2ZVc5d1g1Cm9FeWtqbEZ6ZGQ5cWp2Vk5BRjZEcDdCTXY2d0xjbUVMQXMweGRJQ0pFSWV4RWRUcHNQdDFnYnRnNVE5VzJOOVQKS0NZTEFEOEFKRlFRUFpyN2JJZlZycDVPNG5iOHhVSlBJYjNLOTd6cVlqUHNnSGpZVm9kSDAzTUpLV01UTDEydAp2WTB6dm9KWHREOHV1WVEyakJjYnNGWjJibTl6eDkrM1EvL2tlMVdNVE5paFJzQmdqdUtBSmQ5dU9vMGNLUUhDClV4Z3hrbENQQ1lIbUZTYmNJeU9vTm9IRzgyNjV0cXZKWjFFWmRUanAKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://172.18.0.3:6443 + name: c1 +contexts: +- context: + cluster: c1 + user: c1-admin + name: c1-admin@c1 +current-context: c1-admin@c1 +kind: Config +preferences: {} +users: +- name: c1-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURFekNDQWZ1Z0F3SUJBZ0lJZHRMeUdaTUtrcGN3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TkRFd01qTXhOVE0yTXpCYUZ3MHlOVEV3TWpNeE5UUXhNekJhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQTBVL0V1TmluWDdZMURsRnYKSWxJaXg0QWFVMDhBdkh5WXIwQUtQcnNvRmtnTVNVQnlkcDZBQmdqZGRpWTZ3RUw1eitFN0NDZXpQN29nRlhRbApxVlVUWlhlRnljdVFhUzdBN001WUh3dVNNaHJuaHRwTHplY3hDT1hhODBXMEgxcFNQdnhwdXlxVTZsMEx4a3crCjBUZU15cjdRTlpUWmx6Q2lmdmlyOEduQU45ZGZQZVlmeWtKaDlkK3gyekxFdE1ZT1ozKzlnQjlsMUFzQmRMVjQKbnFHTVVWV1VWaEc3am9hMDlaYjh4UTc5dTJMamVNNkV0b2oyTHFOR1FONzJva1hZT1owaXI3RjhpYTcyeEFnRwpHVTQzZDBsa3owdTd1ZjNaaVJKUGhvQ0YwUHJBSFRDVmxpTWVXVHFLN0ZBV0p6UTcrcnFmS08rSng0SUFCNTN6CnRLZE1CUUlEQVFBQm8wZ3dSakFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0h3WURWUjBqQkJnd0ZvQVV6K0RXS29UTW1LYmozT2IxQ3RuSE1qZzk2cVF3RFFZSktvWklodmNOQVFFTApCUUFEZ2dFQkFFWEY5dXNDVGVxY29GTm9NNkM2MEdvaWVVNVhVR1VseHBYMndQMTFYSU5KZ0lkcXZWa0VFT2M3CmpmdG8wQXM2clJwazcyRmNCWnF3UzVaL2xxUEFlQUFudWpSUnhVWHdhck02NVhoY3h3UGJ3NHdTQjlsR0k5aGIKZGVlMS8zbGdIUEhGZEdRYk90aFJlN3phUitXWUMzRVFFWk02TWtNTVE5WEFEK25aOUhDSGIzSURqdWtwTUwxQwpQQ1FBUW1TUnpsa21zU1N2aCtsYjQ0cnJvK3NpU29kTDFiWTZwWnljODc0OTIrVytVOWxYOFVSZGhRVVZETGZrCmdqenc4ancvbmhmTGlhZmphZTV2K1M0eW5uc1Z4dTlMcEVTdTNGRDRIUS80Zk1QYWpDWTYrZjhmZFY1WWZoRmsKSmRyTnowMU8wZmcrdisrTWpBUm5hbk1QY2RqbEJROD0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb3dJQkFBS0NBUUVBMFUvRXVOaW5YN1kxRGxGdklsSWl4NEFhVTA4QXZIeVlyMEFLUHJzb0ZrZ01TVUJ5CmRwNkFCZ2pkZGlZNndFTDV6K0U3Q0NlelA3b2dGWFFscVZVVFpYZUZ5Y3VRYVM3QTdNNVlId3VTTWhybmh0cEwKemVjeENPWGE4MFcwSDFwU1B2eHB1eXFVNmwwTHhrdyswVGVNeXI3UU5aVFpsekNpZnZpcjhHbkFOOWRmUGVZZgp5a0poOWQreDJ6TEV0TVlPWjMrOWdCOWwxQXNCZExWNG5xR01VVldVVmhHN2pvYTA5WmI4eFE3OXUyTGplTTZFCnRvajJMcU5HUU43Mm9rWFlPWjBpcjdGOGlhNzJ4QWdHR1U0M2QwbGt6MHU3dWYzWmlSSlBob0NGMFByQUhUQ1YKbGlNZVdUcUs3RkFXSnpRNytycWZLTytKeDRJQUI1M3p0S2RNQlFJREFRQUJBb0lCQUZOYTAvblU5ZHFzNGpZZwpnZ1NGOXluQnZ6b2NCU3dLNW5jc0pxaFN1aVVkYmg4aDFqT2M3Vmo1NE0vemE0UXpaKzY4UUZrQURtYzdoUmxtCkZyVjNsdkRhaDJJVWVoOFpLbVZqeXo0WnlvKy9uU0piL3NJeDVwb25oMmxJQWZKV2N0aDRodGF0bjM0eHJjVzcKaVd5aUZhU01Gcml2M1hOVFAvTVdyc3BnSXRkWmFlLzlaNkk3SnFvdE00aWVYOXc5U1hGcWNCUUFTTHhQZGFDMgp3QlAzNEJSS1RmQ1BLMTU4Z09ma0pNSStNblhYOVgvNE52d056a09aVENQTVE0WFRSOGx2YXJvRENkT1VBUnpPCmovcWFFYkswanJIYUJGNzd6cWp3QXd0MW9SUllwWGFSOFl6ekorUmZUZWN2SWwrK3p2TXAxZDhhOWI5MGM3bjAKYkdjSTlZVUNnWUVBOWtnTktYeDdLajhXc1hDK29uWVJsZ0RSOHVvVUQ2MDJRRldWS3oxU0diSTEwdm93UFZwYgpNODBqK2dUUTVVdThLamhlSTBuTEpXK1FSY0RGU0pGbUUvekFKN0NGRGppRkQ4ZVpuMGVOeEFqYWZHRGt4WklaCi9OT09iNUp4UDR5RllaaXhBcDV6Zzd3R2E5UTEyalVRaXgvM3dGbjVkRFk3ZFZhS2hBR2NKUk1DZ1lFQTJaSSsKM0JRaVN4elQxVmplQzJkdC8wWnJPOWdWV3BsYlZQQkdqd042MG1JaERzQndpNWlmaDI2SVhqSFVObHBEa3FlZQozKytrVzhNNlIycklKV0VjcnR0NnVWeWlXQnFXMG9IVENzV2JwVFFXaEZHT3JOcklCTGkrMWZHWjBZMTgydTRxCkkySXo0Sk01NHVjTVcrbjNobGZzZFpjSWVpbWF0WmRub2JDOEpZY0NnWUIxTlhrK0kwWWlwdi9QMXM2RFZ4bXIKZ0J4ck5VZnk1Z2FKdTdGUWNOQ2Y5aFp6b0NwUktLMTdDOGh4ZHRWTUt5MFVFdHVLZzlZd3JOWEd2S1dua2JzNQpDL01QY0kwQ1paZStHTXBkNlpub2tDWWJSNm5ZOVYxMDIrSlA2eXdHaUlQNkhNY3hiZU9mOEY1Rjl5cmgvSnN3Cit6Zlh6WHBRK25aMXM0Z2NwdjkvRFFLQmdDTWlPYUppWWZOTk1XdjNVTXY0cE11ZlBIdktkaEJPU1hCYTZKbjUKanVEZ1ZjUkRFU21KdU9FdTJUaVV4VmNObG5IZFBZdWQ4Q1dkVGhEd1RtZXkyZVhtclZlM2ZNUExiMldJNzJLQwo3Skp0NmVEdUpxTUZKVjJWMDhOS28zTXliT0lScVo1VElsdDJpdGQ0UmdlbHJZSDFPc3IyVzdrV2ROUTBJZGo3CmFEYUxBb0dCQUl2cjFQWDB1L0pqc0xxNWZmaEc4YWJVaDU2MHBWa0ZvMWs4V2xQSlU5UGEwQ216RnBVMVQ0eCsKQlRKQ3pON0xrU0tKRFNoL01GbW9RUVFCL0ZEVEhPakZMcTM4WVZFd2x4YzJOQ25sa2VZY09vQk1wQ0xHeThFYQpSdEpiTDMyVFlHZmo1OUVJR2Q3bXhJLzd5SGVERmx3N2R0bzUzOUVIblFielRzcUwxeXN1Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg== + diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index ebec9057..63c5f353 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -209,7 +209,9 @@ func (w *Workload) GetCertificatesExpiryDate(ctx context.Context, machine *clust } func (w *Workload) ApproveCertificates(ctx context.Context, machine *clusterv1.Machine, seed int) error { - request := apiv1.ClusterAPIApproveWorkerCSRRequest{} + request := apiv1.ClusterAPIApproveWorkerCSRRequest{ + Seed: seed, + } response := &apiv1.ClusterAPIApproveWorkerCSRResponse{} k8sdProxy, err := w.GetK8sdProxyForControlPlane(ctx, k8sdProxyOptions{}) if err != nil { diff --git a/test/e2e/config/ck8s-docker.yaml b/test/e2e/config/ck8s-docker.yaml index 8dba00e0..e2eb6722 100644 --- a/test/e2e/config/ck8s-docker.yaml +++ b/test/e2e/config/ck8s-docker.yaml @@ -105,6 +105,7 @@ intervals: default/wait-nodes-ready: ["10m", "10s"] default/wait-machine-remediation: ["5m", "10s"] default/wait-autoscaler: ["5m", "10s"] + default/wait-machine-refresh: ["5m", "10s"] node-drain/wait-deployment-available: ["3m", "10s"] node-drain/wait-control-plane: ["15m", "10s"] node-drain/wait-machine-deleted: ["2m", "10s"] diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 42ad619a..7c771503 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -554,6 +554,148 @@ func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForContr }) } +type ApplyCertificateRefreshAndWaitInput struct { + Getter framework.Getter + Machine *clusterv1.Machine + ClusterProxy framework.ClusterProxy + TTL string + WaitForRefreshIntervals []interface{} +} + +func ApplyCertificateRefreshAndWait(ctx context.Context, input ApplyCertificateRefreshAndWaitInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.Machine).ToNot(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.TTL).ToNot(BeEmpty()) + + mgmtClient := input.ClusterProxy.GetClient() + + patchHelper, err := patch.NewHelper(input.Machine, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + + mAnnotations := input.Machine.GetAnnotations() + if mAnnotations == nil { + mAnnotations = map[string]string{} + } + + mAnnotations[bootstrapv1.CertificatesRefreshAnnotation] = input.TTL + input.Machine.SetAnnotations(mAnnotations) + err = patchHelper.Patch(ctx, input.Machine) + Expect(err).ToNot(HaveOccurred()) + + By("Waiting for certificates to be refreshed") + Eventually(func() (bool, error) { + machine := &clusterv1.Machine{} + if err := input.Getter.Get(ctx, client.ObjectKey{ + Namespace: input.Machine.Namespace, + Name: input.Machine.Name, + }, machine); err != nil { + return false, err + } + + mAnnotations := machine.GetAnnotations() + if mAnnotations == nil { + return false, nil + } + + status, ok := mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] + if !ok { + return false, nil + } + + if status == bootstrapv1.CertificatesRefreshFailedStatus { + return false, fmt.Errorf("certificates refresh failed for machine %s", machine.Name) + } + + return status == bootstrapv1.CertificatesRefreshDoneStatus, nil + }, input.WaitForRefreshIntervals...).Should(BeTrue(), "Certificates refresh failed for %s", input.Machine.Name) +} + +type ApplyCertificateRefreshForControlPlaneInput struct { + Lister framework.Lister + Getter framework.Getter + ClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + TTL string + WaitForRefreshIntervals []interface{} +} + +func ApplyCertificateRefreshForControlPlane(ctx context.Context, input ApplyCertificateRefreshForControlPlaneInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.Cluster).ToNot(BeNil()) + Expect(input.TTL).ToNot(BeEmpty()) + + By("Looking up control plane machines") + machineList := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, machineList, + client.InNamespace(input.Cluster.Namespace), + client.MatchingLabels{ + clusterv1.ClusterNameLabel: input.Cluster.Name, + clusterv1.MachineControlPlaneLabel: "", + }) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), + "Failed to list control plane machines for cluster %q", input.Cluster.Name) + + for i := range machineList.Items { + machine := &machineList.Items[i] + By(fmt.Sprintf("Refreshing certificates for control plane machine: %s", machine.Name)) + ApplyCertificateRefreshAndWait(ctx, ApplyCertificateRefreshAndWaitInput{ + Getter: input.Getter, + Machine: machine, + ClusterProxy: input.ClusterProxy, + TTL: input.TTL, + WaitForRefreshIntervals: input.WaitForRefreshIntervals, + }) + } +} + +type ApplyCertificateRefreshForWorkerInput struct { + Lister framework.Lister + Getter framework.Getter + ClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + MachineDeployments []*clusterv1.MachineDeployment + TTL string + WaitForRefreshIntervals []interface{} +} + +func ApplyCertificateRefreshForWorker(ctx context.Context, input ApplyCertificateRefreshForWorkerInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.Cluster).ToNot(BeNil()) + Expect(input.MachineDeployments).ToNot(BeNil()) + Expect(input.TTL).ToNot(BeEmpty()) + + for _, md := range input.MachineDeployments { + By(fmt.Sprintf("Refreshing certificates for machines in deployment %s", md.Name)) + + inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace) + matchClusterListOption := client.MatchingLabels{ + clusterv1.ClusterNameLabel: input.Cluster.Name, + clusterv1.MachineDeploymentNameLabel: md.Name, + } + + machineList := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list machines for deployment %q in the cluster %q", md.Name, input.Cluster.Name) + + for i := range machineList.Items { + machine := &machineList.Items[i] + By(fmt.Sprintf("Refreshing certificates for worker machine: %s", machine.Name)) + ApplyCertificateRefreshAndWait(ctx, ApplyCertificateRefreshAndWaitInput{ + Getter: input.Getter, + Machine: machine, + ClusterProxy: input.ClusterProxy, + TTL: input.TTL, + WaitForRefreshIntervals: input.WaitForRefreshIntervals, + }) + } + } +} + type ApplyInPlaceUpgradeAndWaitInput struct { Getter framework.Getter Machine *clusterv1.Machine diff --git a/test/e2e/refresh_certs_test.go b/test/e2e/refresh_certs_test.go new file mode 100644 index 00000000..d28160e3 --- /dev/null +++ b/test/e2e/refresh_certs_test.go @@ -0,0 +1,139 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "path/filepath" + "time" + + bootstrapv1 "github.com/canonical/cluster-api-k8s/bootstrap/api/v1beta2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var _ = Describe("Certificate Refresh", func() { + var ( + ctx = context.TODO() + specName = "workload-cluster-certificate-refresh" + namespace *corev1.Namespace + cancelWatches context.CancelFunc + result *ApplyClusterTemplateAndWaitResult + clusterName string + clusterctlLogFolder string + infrastructureProvider string + ) + + BeforeEach(func() { + Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) + + clusterName = fmt.Sprintf("capick8s-certificate-refresh-%s", util.RandomString(6)) + infrastructureProvider = clusterctl.DefaultInfrastructureProvider + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) + result = new(ApplyClusterTemplateAndWaitResult) + clusterctlLogFolder = filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()) + }) + + AfterEach(func() { + cleanInput := cleanupInput{ + SpecName: specName, + Cluster: result.Cluster, + ClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + CancelWatches: cancelWatches, + IntervalsGetter: e2eConfig.GetIntervals, + SkipCleanup: skipCleanup, + ArtifactFolder: artifactFolder, + } + + dumpSpecResourcesAndCleanup(ctx, cleanInput) + }) + + Context("Performing certificate refresh", func() { + It("Should successfully refresh certificates for a cluster [PR-Blocking]", func() { + By("Creating a workload cluster with a single control plane and a single worker node") + ApplyClusterTemplateAndWait(ctx, ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: clusterctlLogFolder, + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: infrastructureProvider, + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), + ControlPlaneMachineCount: ptr.To(int64(1)), + WorkerMachineCount: ptr.To(int64(1)), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), + }, result) + + bootstrapProxyClient := bootstrapClusterProxy.GetClient() + + By("Refreshing certificates for the control plane nodes") + ApplyCertificateRefreshForControlPlane(ctx, ApplyCertificateRefreshForControlPlaneInput{ + Lister: bootstrapProxyClient, + Getter: bootstrapProxyClient, + ClusterProxy: bootstrapClusterProxy, + Cluster: result.Cluster, + TTL: "1y", + WaitForRefreshIntervals: e2eConfig.GetIntervals(specName, "wait-machine-refresh"), + }) + + By("Refreshing certificates for the worker nodes") + ApplyCertificateRefreshForWorker(ctx, ApplyCertificateRefreshForWorkerInput{ + Lister: bootstrapProxyClient, + Getter: bootstrapProxyClient, + ClusterProxy: bootstrapClusterProxy, + Cluster: result.Cluster, + MachineDeployments: result.MachineDeployments, + TTL: "1y", + WaitForRefreshIntervals: e2eConfig.GetIntervals(specName, "wait-machine-refresh"), + }) + + By("Verifying certificates expiry dates are updated") + machineList := &clusterv1.MachineList{} + Expect(bootstrapProxyClient.List(ctx, machineList, + client.InNamespace(result.Cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: result.Cluster.Name}, + )).To(Succeed()) + + for _, machine := range machineList.Items { + mAnnotations := machine.GetAnnotations() + Expect(mAnnotations).To(HaveKey(bootstrapv1.MachineCertificatesExpiryDateAnnotation)) + Expect(mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation]).To(Equal(bootstrapv1.CertificatesRefreshDoneStatus)) + + _, err := time.Parse(time.RFC3339, mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation]) + Expect(err).NotTo(HaveOccurred()) + } + }) + }) +})