Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Refresh Worker Certificates Logic #65

Merged
merged 8 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ jobs:
- "Workload cluster creation"
- "Workload cluster scaling"
- "Workload cluster upgrade"
- "Certificate Refresh"
- "Orchestrated In place upgrades"
# TODO(ben): Remove once all tests are running stable.
fail-fast: false
Expand Down
9 changes: 8 additions & 1 deletion bootstrap/api/v1beta2/certificates_refresh_consts.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
package v1beta2

const (
CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates"
CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates"
CertificatesRefreshStatusAnnotation = "v1beta2.k8sd.io/refresh-certificates-status"
)

const (
CertificatesRefreshInProgressStatus = "in-progress"
CertificatesRefreshDoneStatus = "done"
CertificatesRefreshFailedStatus = "failed"
)

const (
Expand Down
192 changes: 119 additions & 73 deletions bootstrap/controllers/certificates_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,25 @@ import (
// CertificatesReconciler reconciles a Machine's certificates.
type CertificatesReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
recorder record.EventRecorder

K8sdDialTimeout time.Duration

Log logr.Logger
Scheme *runtime.Scheme
recorder record.EventRecorder
K8sdDialTimeout time.Duration
managementCluster ck8s.ManagementCluster
}

type CertificatesScope struct {
Cluster *clusterv1.Cluster
Config *bootstrapv1.CK8sConfig
Log logr.Logger
Machine *clusterv1.Machine
Patcher *patch.Helper
Workload *ck8s.Workload
}

// SetupWithManager sets up the controller with the Manager.
func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error {
if _, err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Build(r); err != nil {
if err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Complete(r); err != nil {
return err
}

Expand All @@ -54,15 +61,6 @@ func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error {
return nil
}

type CertificatesScope struct {
Cluster *clusterv1.Cluster
Config *bootstrapv1.CK8sConfig
Log logr.Logger
Machine *clusterv1.Machine
Patcher *patch.Helper
Workload *ck8s.Workload
}

// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs/status,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status;machines;machines/status,verbs=get;list;watch
Expand All @@ -77,97 +75,133 @@ func (r *CertificatesReconciler) Reconcile(ctx context.Context, req ctrl.Request
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
// Error reading the object - requeue the request.
return ctrl.Result{}, err
}

if m.Status.NodeRef == nil {
// If the machine does not have a node ref, we requeue the request to retry.
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}

if !m.ObjectMeta.DeletionTimestamp.IsZero() {
// Machine is being deleted, return early.
return ctrl.Result{}, nil
}

mAnnotations := m.GetAnnotations()
if mAnnotations == nil {
mAnnotations = map[string]string{}
}

var refreshCertificates, hasExpiryDateAnnotation bool
_, refreshCertificates = mAnnotations[bootstrapv1.CertificatesRefreshAnnotation]
_, hasExpiryDateAnnotation = mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation]

if mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] == bootstrapv1.CertificatesRefreshInProgressStatus {
if !refreshCertificates {
// If a refresh is in progress but the refresh annotation is missing
// clear the status.
delete(mAnnotations, bootstrapv1.CertificatesRefreshStatusAnnotation)
m.SetAnnotations(mAnnotations)
if err := r.Client.Update(ctx, m); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to clear status annotation: %w", err)
}
return ctrl.Result{}, nil
}
log.Info("Certificates refresh already in progress",
"refreshStatus", bootstrapv1.CertificatesRefreshInProgressStatus,
"refreshAnnotation", mAnnotations[bootstrapv1.CertificatesRefreshAnnotation],
)
return ctrl.Result{}, nil
}

if !refreshCertificates && hasExpiryDateAnnotation {
// No need to refresh certificates or update expiry date, return early.
return ctrl.Result{}, nil
}

// Look up for the CK8sConfig.
scope, err := r.createScope(ctx, m, log)
if err != nil {
return ctrl.Result{}, err
}

if !hasExpiryDateAnnotation {
if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil {
return ctrl.Result{}, err
}
}

if refreshCertificates {
if err := r.refreshCertificates(ctx, scope); err != nil {
// On error, we requeue the request to retry.
mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshFailedStatus
m.SetAnnotations(mAnnotations)
if err := r.Client.Update(ctx, m); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to clear status annotation after error: %w", err)
}
return ctrl.Result{}, err
}
}

return ctrl.Result{}, nil
}

func (r *CertificatesReconciler) createScope(ctx context.Context, m *clusterv1.Machine, log logr.Logger) (*CertificatesScope, error) {
config := &bootstrapv1.CK8sConfig{}
if err := r.Client.Get(ctx, types.NamespacedName{Namespace: m.Namespace, Name: m.Spec.Bootstrap.ConfigRef.Name}, config); err != nil {
return ctrl.Result{}, err
return nil, fmt.Errorf("failed to get CK8sConfig: %w", err)
}

// Get the owner of the CK8sConfig to determine if it's a control plane or worker node.
configOwner, err := bsutil.GetConfigOwner(ctx, r.Client, config)
if err != nil {
log.Error(err, "Failed to get config owner")
return ctrl.Result{}, err
}
if configOwner == nil {
return ctrl.Result{}, nil
if err != nil || configOwner == nil {
return nil, fmt.Errorf("failed to get config owner: %w", err)
}

cluster, err := util.GetClusterByName(ctx, r.Client, m.GetNamespace(), m.Spec.ClusterName)
if err != nil {
return ctrl.Result{}, err
return nil, fmt.Errorf("failed to get cluster: %w", err)
}

microclusterPort := config.Spec.ControlPlaneConfig.GetMicroclusterPort()
workload, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster), microclusterPort)
workload, err := r.managementCluster.GetWorkloadCluster(
ctx,
util.ObjectKey(cluster),
config.Spec.ControlPlaneConfig.GetMicroclusterPort(),
)
if err != nil {
return ctrl.Result{}, err
return nil, fmt.Errorf("failed to get workload cluster: %w", err)
}

patchHelper, err := patch.NewHelper(m, r.Client)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create patch helper for machine: %w", err)
return nil, fmt.Errorf("failed to create patch helper: %w", err)
}

scope := &CertificatesScope{
return &CertificatesScope{
Log: log,
Machine: m,
Config: config,
Cluster: cluster,
Patcher: patchHelper,
Workload: workload,
}

if !hasExpiryDateAnnotation {
if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil {
return ctrl.Result{}, err
}
}

if refreshCertificates {
if configOwner.IsControlPlaneMachine() {
if err := r.refreshControlPlaneCertificates(ctx, scope); err != nil {
return ctrl.Result{}, err
}
} else {
log.Info("worker nodes are not supported yet")
return ctrl.Result{}, nil
}
}

return ctrl.Result{}, nil
}, nil
}

func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Context, scope *CertificatesScope) error {
func (r *CertificatesReconciler) refreshCertificates(ctx context.Context, scope *CertificatesScope) error {
nodeToken, err := token.LookupNodeToken(ctx, r.Client, util.ObjectKey(scope.Cluster), scope.Machine.Name)
if err != nil {
return fmt.Errorf("failed to lookup node token: %w", err)
}

mAnnotations := scope.Machine.GetAnnotations()

refreshAnnotation, ok := mAnnotations[bootstrapv1.CertificatesRefreshAnnotation]
if !ok {
return nil
return fmt.Errorf("refresh annotation not found")
}

mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshInProgressStatus
scope.Machine.SetAnnotations(mAnnotations)
if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil {
return fmt.Errorf("failed to set in-progress status: %w", err)
}

r.recorder.Eventf(
Expand All @@ -179,16 +213,31 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con

seconds, err := utiltime.TTLToSeconds(refreshAnnotation)
if err != nil {
return fmt.Errorf("failed to parse expires-in annotation value: %w", err)
return fmt.Errorf("failed to parse TTL: %w", err)
}

controlPlaneConfig := scope.Config.Spec.ControlPlaneConfig
controlPlaneEndpoint := scope.Cluster.Spec.ControlPlaneEndpoint.Host

extraSANs := controlPlaneConfig.ExtraSANs
extraSANs = append(extraSANs, controlPlaneEndpoint)
var expirySecondsUnix int
configOwner, _ := bsutil.GetConfigOwner(ctx, r.Client, scope.Config)
if configOwner.IsControlPlaneMachine() {
var extraSANs []string
extraSANs = append(extraSANs, scope.Config.Spec.ControlPlaneConfig.ExtraSANs...)
extraSANs = append(extraSANs, scope.Cluster.Spec.ControlPlaneEndpoint.Host)
expirySecondsUnix, err = scope.Workload.RefreshControlPlaneCertificates(
ctx,
scope.Machine,
*nodeToken,
seconds,
extraSANs,
)
} else {
expirySecondsUnix, err = scope.Workload.RefreshWorkerCertificates(
ctx,
scope.Machine,
*nodeToken,
seconds,
)
}

expirySecondsUnix, err := scope.Workload.RefreshCertificates(ctx, scope.Machine, *nodeToken, seconds, extraSANs)
if err != nil {
r.recorder.Eventf(
scope.Machine,
Expand All @@ -200,10 +249,11 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con
}

expiryTime := time.Unix(int64(expirySecondsUnix), 0)

delete(mAnnotations, bootstrapv1.CertificatesRefreshAnnotation)
mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshDoneStatus
mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryTime.Format(time.RFC3339)
scope.Machine.SetAnnotations(mAnnotations)

if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil {
return fmt.Errorf("failed to patch machine annotations: %w", err)
}
Expand All @@ -230,21 +280,17 @@ func (r *CertificatesReconciler) updateExpiryDateAnnotation(ctx context.Context,
return fmt.Errorf("failed to lookup node token: %w", err)
}

mAnnotations := scope.Machine.GetAnnotations()
if mAnnotations == nil {
mAnnotations = map[string]string{}
}

expiryDateString, err := scope.Workload.GetCertificatesExpiryDate(ctx, scope.Machine, *nodeToken)
if err != nil {
return fmt.Errorf("failed to get certificates expiry date: %w", err)
}

mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString
scope.Machine.SetAnnotations(mAnnotations)
if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil {
return fmt.Errorf("failed to patch machine annotations: %w", err)
mAnnotations := scope.Machine.GetAnnotations()
if mAnnotations == nil {
mAnnotations = map[string]string{}
}

return nil
mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString
scope.Machine.SetAnnotations(mAnnotations)
return scope.Patcher.Patch(ctx, scope.Machine)
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ require (
golang.org/x/mod v0.19.0
golang.org/x/net v0.23.0 // indirect
golang.org/x/oauth2 v0.18.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sync v0.8.0
golang.org/x/sys v0.18.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
Loading