diff --git a/internal/controllers/topology/cluster/desired_state.go b/internal/controllers/topology/cluster/desired_state.go index cc48434765cf..140ff6439048 100644 --- a/internal/controllers/topology/cluster/desired_state.go +++ b/internal/controllers/topology/cluster/desired_state.go @@ -86,7 +86,7 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (* desiredState.ControlPlane.MachineHealthCheck = computeMachineHealthCheck( desiredState.ControlPlane.Object, selectorForControlPlaneMHC(), - s.Current.Cluster.Name, + s.Current.Cluster, s.Blueprint.ControlPlaneMachineHealthCheckClass()) } @@ -764,7 +764,7 @@ func computeMachineDeployment(_ context.Context, s *scope.Scope, machineDeployme desiredMachineDeployment.MachineHealthCheck = computeMachineHealthCheck( desiredMachineDeploymentObj, selectorForMachineDeploymentMHC(desiredMachineDeploymentObj), - s.Current.Cluster.Name, + s.Current.Cluster, s.Blueprint.MachineDeploymentMachineHealthCheckClass(&machineDeploymentTopology)) } return desiredMachineDeployment, nil @@ -1020,7 +1020,7 @@ func ownerReferenceTo(obj client.Object) *metav1.OwnerReference { } } -func computeMachineHealthCheck(healthCheckTarget client.Object, selector *metav1.LabelSelector, clusterName string, check *clusterv1.MachineHealthCheckClass) *clusterv1.MachineHealthCheck { +func computeMachineHealthCheck(healthCheckTarget client.Object, selector *metav1.LabelSelector, cluster *clusterv1.Cluster, check *clusterv1.MachineHealthCheckClass) *clusterv1.MachineHealthCheck { // Create a MachineHealthCheck with the spec given in the ClusterClass. mhc := &clusterv1.MachineHealthCheck{ TypeMeta: metav1.TypeMeta{ @@ -1033,9 +1033,14 @@ func computeMachineHealthCheck(healthCheckTarget client.Object, selector *metav1 Labels: map[string]string{ clusterv1.ClusterTopologyOwnedLabel: "", }, + // Note: we are adding an ownerRef to Cluster so the MHC will be automatically garbage collected + // in case deletion is triggered before an object reconcile happens. + OwnerReferences: []metav1.OwnerReference{ + *ownerReferenceTo(cluster), + }, }, Spec: clusterv1.MachineHealthCheckSpec{ - ClusterName: clusterName, + ClusterName: cluster.Name, Selector: *selector, UnhealthyConditions: check.UnhealthyConditions, MaxUnhealthy: check.MaxUnhealthy, diff --git a/internal/controllers/topology/cluster/desired_state_test.go b/internal/controllers/topology/cluster/desired_state_test.go index 91079163a4a3..31e6cc63c166 100644 --- a/internal/controllers/topology/cluster/desired_state_test.go +++ b/internal/controllers/topology/cluster/desired_state_test.go @@ -2239,7 +2239,7 @@ func Test_computeMachineHealthCheck(t *testing.T) { "foo": "bar", }} healthCheckTarget := builder.MachineDeployment("ns1", "md1").Build() - clusterName := "cluster1" + cluster := builder.Cluster("ns1", "cluster1").Build() want := &clusterv1.MachineHealthCheck{ TypeMeta: metav1.TypeMeta{ Kind: clusterv1.GroupVersion.WithKind("MachineHealthCheck").Kind, @@ -2253,9 +2253,12 @@ func Test_computeMachineHealthCheck(t *testing.T) { "cluster.x-k8s.io/cluster-name": "cluster1", clusterv1.ClusterTopologyOwnedLabel: "", }, + OwnerReferences: []metav1.OwnerReference{ + *ownerReferenceTo(cluster), + }, }, Spec: clusterv1.MachineHealthCheckSpec{ - ClusterName: "cluster1", + ClusterName: cluster.Name, Selector: metav1.LabelSelector{MatchLabels: map[string]string{ "foo": "bar", }}, @@ -2281,7 +2284,7 @@ func Test_computeMachineHealthCheck(t *testing.T) { t.Run("set all fields correctly", func(t *testing.T) { g := NewWithT(t) - got := computeMachineHealthCheck(healthCheckTarget, selector, clusterName, mhcSpec) + got := computeMachineHealthCheck(healthCheckTarget, selector, cluster, mhcSpec) g.Expect(got).To(Equal(want), cmp.Diff(got, want)) }) diff --git a/internal/controllers/topology/cluster/reconcile_state.go b/internal/controllers/topology/cluster/reconcile_state.go index 3e597b36da38..28df1383ce19 100644 --- a/internal/controllers/topology/cluster/reconcile_state.go +++ b/internal/controllers/topology/cluster/reconcile_state.go @@ -45,6 +45,7 @@ import ( "sigs.k8s.io/cluster-api/internal/hooks" tlog "sigs.k8s.io/cluster-api/internal/log" "sigs.k8s.io/cluster-api/internal/topology/check" + "sigs.k8s.io/cluster-api/util" ) const ( @@ -88,7 +89,7 @@ func (r *Reconciler) reconcileState(ctx context.Context, s *scope.Scope) error { return errControlPlane } - // In this case (reconcileInfrastructureCluster passed reporting creation of the infrastructure cluster object, reconcileControlPlane - which is expected to create the control plane object - failed), + // In this case (reconcileInfrastructureCluster reported creation of the infrastructure cluster object, reconcileControlPlane - which is expected to create the control plane object - failed), // if the creation of the control plane actually did not happen, blank out ControlPlaneRef from desired cluster. if s.Current.Cluster.Spec.ControlPlaneRef == nil && !createdControlPlane { s.Desired.Cluster.Spec.ControlPlaneRef = nil @@ -133,14 +134,18 @@ func (r *Reconciler) reconcileClusterShim(ctx context.Context, s *scope.Scope) e shim.APIVersion = corev1.SchemeGroupVersion.String() // Add the shim as a temporary owner for the InfrastructureCluster. - ownerRefs := s.Desired.InfrastructureCluster.GetOwnerReferences() - ownerRefs = append(ownerRefs, *ownerReferenceTo(shim)) - s.Desired.InfrastructureCluster.SetOwnerReferences(ownerRefs) + s.Desired.InfrastructureCluster.SetOwnerReferences( + util.EnsureOwnerRef(s.Desired.InfrastructureCluster.GetOwnerReferences(), + *ownerReferenceTo(shim), + ), + ) // Add the shim as a temporary owner for the ControlPlane. - ownerRefs = s.Desired.ControlPlane.Object.GetOwnerReferences() - ownerRefs = append(ownerRefs, *ownerReferenceTo(shim)) - s.Desired.ControlPlane.Object.SetOwnerReferences(ownerRefs) + s.Desired.ControlPlane.Object.SetOwnerReferences( + util.EnsureOwnerRef(s.Desired.ControlPlane.Object.GetOwnerReferences(), + *ownerReferenceTo(shim), + ), + ) } // If the InfrastructureCluster and the ControlPlane objects have been already created @@ -323,6 +328,7 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) return false, nil } // If the clusterClass mandates the controlPlane has infrastructureMachines, reconcile it. + infrastructureMachineCleanupFunc := func() {} if s.Blueprint.HasControlPlaneInfrastructureMachine() { ctx, _ := tlog.LoggerFrom(ctx).WithObject(s.Desired.ControlPlane.InfrastructureMachineTemplate).Into(ctx) @@ -332,21 +338,36 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) } // Create or update the MachineInfrastructureTemplate of the control plane. - if err = r.reconcileReferencedTemplate(ctx, reconcileReferencedTemplateInput{ + createdInfrastructureTemplate, err := r.reconcileReferencedTemplate(ctx, reconcileReferencedTemplateInput{ cluster: s.Current.Cluster, ref: cpInfraRef, current: s.Current.ControlPlane.InfrastructureMachineTemplate, desired: s.Desired.ControlPlane.InfrastructureMachineTemplate, compatibilityChecker: check.ObjectsAreCompatible, templateNamePrefix: controlPlaneInfrastructureMachineTemplateNamePrefix(s.Current.Cluster.Name), - }, - ); err != nil { + }) + if err != nil { return false, err } + if createdInfrastructureTemplate { + infrastructureMachineCleanupFunc = func() { + // Best effort cleanup of the InfrastructureMachineTemplate; + // If this fails, the object will be garbage collected when the cluster is deleted. + if err := r.Client.Delete(ctx, s.Desired.ControlPlane.InfrastructureMachineTemplate); err != nil { + log := tlog.LoggerFrom(ctx). + WithValues(s.Desired.ControlPlane.InfrastructureMachineTemplate.GetObjectKind().GroupVersionKind().Kind, s.Desired.ControlPlane.InfrastructureMachineTemplate.GetName()). + WithValues("err", err.Error()) + log.Infof("WARNING! Failed to cleanup InfrastructureMachineTemplate for control plane while handling creation or update error. The object will be garbage collected when the cluster is deleted.") + } + } + } + // The controlPlaneObject.Spec.machineTemplate.infrastructureRef has to be updated in the desired object err = contract.ControlPlane().MachineTemplate().InfrastructureRef().Set(s.Desired.ControlPlane.Object, refToUnstructured(cpInfraRef)) if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate (only on creation). + infrastructureMachineCleanupFunc() return false, errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: s.Desired.ControlPlane.Object}) } } @@ -360,6 +381,8 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) versionGetter: contract.ControlPlane().Version().Get, }) if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate (only on creation). + infrastructureMachineCleanupFunc() return created, err } @@ -571,28 +594,66 @@ func (r *Reconciler) createMachineDeployment(ctx context.Context, s *scope.Scope log := tlog.LoggerFrom(ctx).WithMachineDeployment(md.Object) cluster := s.Current.Cluster infraCtx, _ := log.WithObject(md.InfrastructureMachineTemplate).Into(ctx) - if err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ + infrastructureMachineCleanupFunc := func() {} + createdInfra, err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ cluster: cluster, desired: md.InfrastructureMachineTemplate, - }); err != nil { + }) + if err != nil { return errors.Wrapf(err, "failed to create %s", md.Object.Kind) } + if createdInfra { + infrastructureMachineCleanupFunc = func() { + // Best effort cleanup of the InfrastructureMachineTemplate; + // If this fails, the object will be garbage collected when the cluster is deleted. + if err := r.Client.Delete(ctx, md.InfrastructureMachineTemplate); err != nil { + log := tlog.LoggerFrom(ctx). + WithValues(md.InfrastructureMachineTemplate.GetObjectKind().GroupVersionKind().Kind, md.InfrastructureMachineTemplate.GetName()). + WithValues("err", err.Error()) + log.Infof("WARNING! Failed to cleanup InfrastructureMachineTemplate for MachineDeployment while handling creation error. The object will be garbage collected when the cluster is deleted.") + } + } + } + bootstrapCtx, _ := log.WithObject(md.BootstrapTemplate).Into(ctx) - if err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ + bootstrapCleanupFunc := func() {} + createdBootstrap, err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ cluster: cluster, desired: md.BootstrapTemplate, - }); err != nil { + }) + if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate (only on creation). + infrastructureMachineCleanupFunc() return errors.Wrapf(err, "failed to create %s", md.Object.Kind) } + if createdBootstrap { + bootstrapCleanupFunc = func() { + // Best effort cleanup of the BootstrapTemplate; + // If this fails, the object will be garbage collected when the cluster is deleted. + if err := r.Client.Delete(ctx, md.BootstrapTemplate); err != nil { + log := tlog.LoggerFrom(ctx). + WithValues(md.BootstrapTemplate.GetObjectKind().GroupVersionKind().Kind, md.BootstrapTemplate.GetName()). + WithValues("err", err.Error()) + log.Infof("WARNING! Failed to cleanup BootstrapTemplate for MachineDeployment while handling creation error. The object will be garbage collected when the cluster is deleted.") + } + } + } + log = log.WithObject(md.Object) log.Infof(fmt.Sprintf("Creating %s", tlog.KObj{Obj: md.Object})) helper, err := r.patchHelperFactory(ctx, nil, md.Object) if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate & BootstrapTemplate (only on creation). + infrastructureMachineCleanupFunc() + bootstrapCleanupFunc() return createErrorWithoutObjectName(ctx, err, md.Object) } if err := helper.Patch(ctx); err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate & BootstrapTemplate (only on creation). + infrastructureMachineCleanupFunc() + bootstrapCleanupFunc() return createErrorWithoutObjectName(ctx, err, md.Object) } r.recorder.Eventf(cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: md.Object}) @@ -645,33 +706,68 @@ func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope cluster := s.Current.Cluster infraCtx, _ := log.WithObject(desiredMD.InfrastructureMachineTemplate).Into(ctx) - if err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ + infrastructureMachineCleanupFunc := func() {} + createdInfra, err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ cluster: cluster, ref: &desiredMD.Object.Spec.Template.Spec.InfrastructureRef, current: currentMD.InfrastructureMachineTemplate, desired: desiredMD.InfrastructureMachineTemplate, templateNamePrefix: infrastructureMachineTemplateNamePrefix(cluster.Name, mdTopologyName), compatibilityChecker: check.ObjectsAreCompatible, - }); err != nil { + }) + if err != nil { return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) } + if createdInfra { + infrastructureMachineCleanupFunc = func() { + // Best effort cleanup of the InfrastructureMachineTemplate; + // If this fails, the object will be garbage collected when the cluster is deleted. + if err := r.Client.Delete(ctx, desiredMD.InfrastructureMachineTemplate); err != nil { + log := tlog.LoggerFrom(ctx). + WithValues(desiredMD.InfrastructureMachineTemplate.GetObjectKind().GroupVersionKind().Kind, desiredMD.InfrastructureMachineTemplate.GetName()). + WithValues("err", err.Error()) + log.Infof("WARNING! Failed to cleanup InfrastructureMachineTemplate for MachineDeployment while handling update error. The object will be garbage collected when the cluster is deleted.") + } + } + } + bootstrapCtx, _ := log.WithObject(desiredMD.BootstrapTemplate).Into(ctx) - if err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ + bootstrapCleanupFunc := func() {} + createdBootstrap, err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ cluster: cluster, ref: desiredMD.Object.Spec.Template.Spec.Bootstrap.ConfigRef, current: currentMD.BootstrapTemplate, desired: desiredMD.BootstrapTemplate, templateNamePrefix: bootstrapTemplateNamePrefix(cluster.Name, mdTopologyName), compatibilityChecker: check.ObjectsAreInTheSameNamespace, - }); err != nil { + }) + if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate (only on template rotation). + infrastructureMachineCleanupFunc() return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) } + if createdBootstrap { + bootstrapCleanupFunc = func() { + // Best effort cleanup of the BootstrapTemplate; + // If this fails, the object will be garbage collected when the cluster is deleted. + if err := r.Client.Delete(ctx, desiredMD.BootstrapTemplate); err != nil { + log := tlog.LoggerFrom(ctx). + WithValues(desiredMD.BootstrapTemplate.GetObjectKind().GroupVersionKind().Kind, desiredMD.BootstrapTemplate.GetName()). + WithValues("err", err.Error()) + log.Infof("WARNING! Failed to cleanup BootstrapTemplate for MachineDeployment while handling update error. The object will be garbage collected when the cluster is deleted.") + } + } + } + // Check differences between current and desired MachineDeployment, and eventually patch the current object. log = log.WithObject(desiredMD.Object) patchHelper, err := r.patchHelperFactory(ctx, currentMD.Object, desiredMD.Object) if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate & BootstrapTemplate (only on template rotation). + infrastructureMachineCleanupFunc() + bootstrapCleanupFunc() return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: currentMD.Object}) } if !patchHelper.HasChanges() { @@ -681,6 +777,9 @@ func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope log.Infof("Patching %s", tlog.KObj{Obj: currentMD.Object}) if err := patchHelper.Patch(ctx); err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate & BootstrapTemplate (only on template rotation). + infrastructureMachineCleanupFunc() + bootstrapCleanupFunc() return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: currentMD.Object}) } r.recorder.Eventf(cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q%s", tlog.KObj{Obj: currentMD.Object}, logMachineDeploymentVersionChange(currentMD.Object, desiredMD.Object)) @@ -773,6 +872,7 @@ type reconcileReferencedObjectInput struct { } // reconcileReferencedObject reconciles the desired state of the referenced object. +// Returns true if the referencedObject is created. // NOTE: After a referenced object is created it is assumed that the reference should // never change (only the content of the object can eventually change). Thus, we are checking for strict compatibility. func (r *Reconciler) reconcileReferencedObject(ctx context.Context, in reconcileReferencedObjectInput) (bool, error) { @@ -845,6 +945,7 @@ type reconcileReferencedTemplateInput struct { } // reconcileReferencedTemplate reconciles the desired state of a referenced Template. +// Returns true if the referencedTemplate is created. // NOTE: According to Cluster API operational practices, when a referenced Template changes a template rotation is required: // 1. create a new Template // 2. update the reference @@ -852,7 +953,7 @@ type reconcileReferencedTemplateInput struct { // This function specifically takes care of the first step and updates the reference locally. So the remaining steps // can be executed afterwards. // NOTE: This func has a side effect in case of template rotation, changing both the desired object and the object reference. -func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconcileReferencedTemplateInput) error { +func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconcileReferencedTemplateInput) (bool, error) { log := tlog.LoggerFrom(ctx) // If there is no current object, create the desired object. @@ -860,34 +961,34 @@ func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconci log.Infof("Creating %s", tlog.KObj{Obj: in.desired}) helper, err := r.patchHelperFactory(ctx, nil, in.desired) if err != nil { - return errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper") + return false, errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper") } if err := helper.Patch(ctx); err != nil { - return createErrorWithoutObjectName(ctx, err, in.desired) + return false, createErrorWithoutObjectName(ctx, err, in.desired) } r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: in.desired}) - return nil + return true, nil } if in.ref == nil { - return errors.Errorf("failed to rotate %s: ref should not be nil", in.desired.GroupVersionKind()) + return false, errors.Errorf("failed to rotate %s: ref should not be nil", in.desired.GroupVersionKind()) } // Check if the current and desired referenced object are compatible. if allErrs := in.compatibilityChecker(in.current, in.desired); len(allErrs) > 0 { - return allErrs.ToAggregate() + return false, allErrs.ToAggregate() } // Check differences between current and desired objects, and if there are changes eventually start the template rotation. patchHelper, err := r.patchHelperFactory(ctx, in.current, in.desired) if err != nil { - return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: in.current}) + return false, errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: in.current}) } // Return if no changes are detected. if !patchHelper.HasChanges() { log.V(3).Infof("No changes for %s", tlog.KObj{Obj: in.desired}) - return nil + return false, nil } // If there are no changes in the spec, and thus only changes in metadata, instead of doing a full template @@ -895,10 +996,10 @@ func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconci if !patchHelper.HasSpecChanges() { log.Infof("Patching %s", tlog.KObj{Obj: in.desired}) if err := patchHelper.Patch(ctx); err != nil { - return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: in.desired}) + return false, errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: in.desired}) } r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q (metadata changes)", tlog.KObj{Obj: in.desired}) - return nil + return false, nil } // Create the new template. @@ -912,10 +1013,10 @@ func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconci log.Infof("Creating %s", tlog.KObj{Obj: in.desired}) helper, err := r.patchHelperFactory(ctx, nil, in.desired) if err != nil { - return errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper") + return false, errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper") } if err := helper.Patch(ctx); err != nil { - return createErrorWithoutObjectName(ctx, err, in.desired) + return false, createErrorWithoutObjectName(ctx, err, in.desired) } r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, createEventReason, "Created %q as a replacement for %q (template rotation)", tlog.KObj{Obj: in.desired}, in.ref.Name) @@ -924,7 +1025,7 @@ func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconci // TODO: find a way to make side effect more explicit in.ref.Name = newName - return nil + return true, nil } // createErrorWithoutObjectName removes the name of the object from the error message. As each new Create call involves an diff --git a/internal/controllers/topology/cluster/reconcile_state_test.go b/internal/controllers/topology/cluster/reconcile_state_test.go index 05daeb329af3..1daa69159c46 100644 --- a/internal/controllers/topology/cluster/reconcile_state_test.go +++ b/internal/controllers/topology/cluster/reconcile_state_test.go @@ -1509,6 +1509,64 @@ func TestReconcileControlPlane(t *testing.T) { } } +func TestReconcileControlPlaneCleanup(t *testing.T) { + infrastructureMachineTemplate := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infra1-cluster-class"). + WithSpecFields(map[string]interface{}{"spec.template.spec.foo": "foo"}). + Build() + ccWithControlPlaneInfrastructure := &scope.ControlPlaneBlueprint{InfrastructureMachineTemplate: infrastructureMachineTemplate} + + infrastructureMachineTemplateCopy := infrastructureMachineTemplate.DeepCopy() + infrastructureMachineTemplateCopy.SetName("infrav1-cluster") + controlPlane := builder.TestControlPlane(metav1.NamespaceDefault, "cp1"). + WithInfrastructureMachineTemplate(infrastructureMachineTemplateCopy). + WithSpecFields(map[string]interface{}{"spec.foo": "foo"}). + Build() + + t.Run("cleanup InfrastructureMachineTemplate in case of errors", func(t *testing.T) { + g := NewWithT(t) + + // Create namespace and modify input to have correct namespace set + namespace, err := env.CreateNamespace(ctx, "reconcile-control-plane") + g.Expect(err).ToNot(HaveOccurred()) + ccWithControlPlaneInfrastructure = prepareControlPlaneBluePrint(ccWithControlPlaneInfrastructure, namespace.GetName()) + + s := scope.New(builder.Cluster(namespace.GetName(), "cluster1").Build()) + s.Blueprint = &scope.ClusterBlueprint{ + ClusterClass: &clusterv1.ClusterClass{ + Spec: clusterv1.ClusterClassSpec{ + ControlPlane: clusterv1.ControlPlaneClass{ + MachineInfrastructure: &clusterv1.LocalObjectTemplate{ + Ref: contract.ObjToRef(infrastructureMachineTemplate), + }, + }, + }, + }, + } + s.Current.ControlPlane = &scope.ControlPlaneState{} + s.Desired = &scope.ClusterState{ + ControlPlane: &scope.ControlPlaneState{Object: controlPlane, InfrastructureMachineTemplate: infrastructureMachineTemplateCopy}, + } + s.Desired.ControlPlane = prepareControlPlaneState(g, s.Desired.ControlPlane, namespace.GetName()) + + // Force control plane creation to fail + s.Desired.ControlPlane.Object.SetNamespace("do-not-exist") + + r := Reconciler{ + Client: env, + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache()), + recorder: env.GetEventRecorderFor("test"), + } + created, err := r.reconcileControlPlane(ctx, s) + g.Expect(err).To(HaveOccurred()) + g.Expect(created).To(BeFalse()) + + gotInfrastructureMachineTemplate := infrastructureMachineTemplateCopy.DeepCopy() + err = env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(infrastructureMachineTemplateCopy), gotInfrastructureMachineTemplate) + + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) +} + func TestReconcileControlPlaneMachineHealthCheck(t *testing.T) { // Create InfrastructureMachineTemplates for test cases infrastructureMachineTemplate := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infra1").Build() @@ -2045,6 +2103,130 @@ func TestReconcileMachineDeployments(t *testing.T) { } } +func TestReconcileMachineDeploymentsCleanup(t *testing.T) { + t.Run("cleanup InfrastructureMachineTemplate and BootstrapTemplate in case of errors on creation", func(t *testing.T) { + g := NewWithT(t) + + infrastructureMachineTemplate1 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-1").Build() + bootstrapTemplate1 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-1").Build() + md1 := newFakeMachineDeploymentTopologyState("md-1", infrastructureMachineTemplate1, bootstrapTemplate1, nil) + + // Create namespace and modify input to have correct namespace set + namespace, err := env.CreateNamespace(ctx, "reconcile-machine-deployments") + g.Expect(err).ToNot(HaveOccurred()) + md1 = prepareMachineDeploymentState(md1, namespace.GetName()) + + s := scope.New(builder.Cluster(namespace.GetName(), "cluster-1").Build()) + s.Current.MachineDeployments = map[string]*scope.MachineDeploymentState{} + s.Desired = &scope.ClusterState{ + MachineDeployments: map[string]*scope.MachineDeploymentState{ + md1.Object.Name: md1, + }, + } + + // Force md creation to fail + s.Desired.MachineDeployments[md1.Object.Name].Object.Namespace = "do-not-exist" + + r := Reconciler{ + Client: env.GetClient(), + APIReader: env.GetAPIReader(), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache()), + recorder: env.GetEventRecorderFor("test"), + } + err = r.reconcileMachineDeployments(ctx, s) + g.Expect(err).To(HaveOccurred()) + + gotBootstrapTemplateRef := md1.Object.Spec.Template.Spec.Bootstrap.ConfigRef + gotBootstrapTemplate := unstructured.Unstructured{} + gotBootstrapTemplate.SetKind(gotBootstrapTemplateRef.Kind) + gotBootstrapTemplate.SetAPIVersion(gotBootstrapTemplateRef.APIVersion) + + err = env.GetAPIReader().Get(ctx, client.ObjectKey{ + Namespace: gotBootstrapTemplateRef.Namespace, + Name: gotBootstrapTemplateRef.Name, + }, &gotBootstrapTemplate) + + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + gotInfrastructureMachineTemplateRef := md1.Object.Spec.Template.Spec.InfrastructureRef + gotInfrastructureMachineTemplate := unstructured.Unstructured{} + gotInfrastructureMachineTemplate.SetKind(gotInfrastructureMachineTemplateRef.Kind) + gotInfrastructureMachineTemplate.SetAPIVersion(gotInfrastructureMachineTemplateRef.APIVersion) + + err = env.GetAPIReader().Get(ctx, client.ObjectKey{ + Namespace: gotInfrastructureMachineTemplateRef.Namespace, + Name: gotInfrastructureMachineTemplateRef.Name, + }, &gotInfrastructureMachineTemplate) + + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + t.Run("cleanup InfrastructureMachineTemplate and BootstrapTemplate in case of errors on upgrade", func(t *testing.T) { + g := NewWithT(t) + + infrastructureMachineTemplate2 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-2").Build() + bootstrapTemplate2 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-2").Build() + md2 := newFakeMachineDeploymentTopologyState("md-2", infrastructureMachineTemplate2, bootstrapTemplate2, nil) + + bootstrapTemplate2WithChanges := bootstrapTemplate2.DeepCopy() + g.Expect(unstructured.SetNestedField(bootstrapTemplate2WithChanges.Object, "foo", "spec", "template", "spec", "foo")).To(Succeed()) + infrastructureMachineTemplate2WithChanges := infrastructureMachineTemplate2.DeepCopy() + g.Expect(unstructured.SetNestedField(infrastructureMachineTemplate2WithChanges.Object, "foo", "spec", "template", "spec", "foo")).To(Succeed()) + md2WithTemplateChanges := newFakeMachineDeploymentTopologyState(md2.Object.Name, infrastructureMachineTemplate2WithChanges, bootstrapTemplate2WithChanges, nil) + + // Create namespace and modify input to have correct namespace set + namespace, err := env.CreateNamespace(ctx, "reconcile-machine-deployments") + g.Expect(err).ToNot(HaveOccurred()) + md2 = prepareMachineDeploymentState(md2, namespace.GetName()) + md2WithTemplateChanges = prepareMachineDeploymentState(md2WithTemplateChanges, namespace.GetName()) + + s := scope.New(builder.Cluster(namespace.GetName(), "cluster-1").Build()) + s.Current.MachineDeployments = map[string]*scope.MachineDeploymentState{ + md2.Object.Name: md2, + } + s.Desired = &scope.ClusterState{ + MachineDeployments: map[string]*scope.MachineDeploymentState{ + md2WithTemplateChanges.Object.Name: md2WithTemplateChanges, + }, + } + + // Force md upgrade to fail + s.Desired.MachineDeployments[md2WithTemplateChanges.Object.Name].Object.Namespace = "do-not-exist" + + r := Reconciler{ + Client: env.GetClient(), + APIReader: env.GetAPIReader(), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache()), + recorder: env.GetEventRecorderFor("test"), + } + err = r.reconcileMachineDeployments(ctx, s) + g.Expect(err).To(HaveOccurred()) + + newBootstrapTemplateRef := md2WithTemplateChanges.Object.Spec.Template.Spec.Bootstrap.ConfigRef + newBootstrapTemplate := unstructured.Unstructured{} + newBootstrapTemplate.SetKind(newBootstrapTemplateRef.Kind) + newBootstrapTemplate.SetAPIVersion(newBootstrapTemplateRef.APIVersion) + + err = env.GetAPIReader().Get(ctx, client.ObjectKey{ + Namespace: newBootstrapTemplateRef.Namespace, + Name: newBootstrapTemplateRef.Name, + }, &newBootstrapTemplate) + + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + newInfrastructureMachineTemplateRef := md2WithTemplateChanges.Object.Spec.Template.Spec.InfrastructureRef + newInfrastructureMachineTemplate := unstructured.Unstructured{} + newInfrastructureMachineTemplate.SetKind(newInfrastructureMachineTemplateRef.Kind) + newInfrastructureMachineTemplate.SetAPIVersion(newInfrastructureMachineTemplateRef.APIVersion) + + err = env.GetAPIReader().Get(ctx, client.ObjectKey{ + Namespace: newInfrastructureMachineTemplateRef.Namespace, + Name: newInfrastructureMachineTemplateRef.Name, + }, &newInfrastructureMachineTemplate) + + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) +} + // TestReconcileReferencedObjectSequences tests multiple subsequent calls to reconcileReferencedObject // for a control-plane object to verify that the objects are reconciled as expected by tracking managed fields correctly. // NOTE: by Extension this tests validates managed field handling in mergePatches, and thus its usage in other parts of the @@ -2517,7 +2699,7 @@ func TestReconcileReferencedObjectSequences(t *testing.T) { current: s.Current.ControlPlane.Object, desired: s.Desired.ControlPlane.Object, }) - g.Expect(err).To(Succeed()) + g.Expect(err).ToNot(HaveOccurred()) g.Expect(created).To(Equal(step.wantCreated)) // Build the object for comparison. @@ -2747,7 +2929,7 @@ func TestReconcileState(t *testing.T) { t.Run("Cluster get reconciled with infrastructure Ref only when reconcileInfrastructureCluster pass and reconcileControlPlane fails ", func(t *testing.T) { g := NewWithT(t) - currentdCluster := builder.Cluster(metav1.NamespaceDefault, "cluster1").Build() + currentCluster := builder.Cluster(metav1.NamespaceDefault, "cluster1").Build() infrastructureCluster := builder.TestInfrastructureCluster(metav1.NamespaceDefault, "infrastructure-cluster1").Build() controlPlane := builder.TestControlPlane(metav1.NamespaceDefault, "controlplane-cluster1").Build() @@ -2759,12 +2941,12 @@ func TestReconcileState(t *testing.T) { // cluster requires a UID because reconcileClusterShim will create a cluster shim // which has the cluster set as Owner in an OwnerReference. // A valid OwnerReferences requires a uid. - currentdCluster.SetUID("foo") + currentCluster.SetUID("foo") // NOTE: it is ok to use create given that the Cluster are created by user. - g.Expect(env.CreateAndWait(ctx, currentdCluster)).To(Succeed()) + g.Expect(env.CreateAndWait(ctx, currentCluster)).To(Succeed()) - s := scope.New(currentdCluster) + s := scope.New(currentCluster) s.Blueprint = &scope.ClusterBlueprint{ClusterClass: &clusterv1.ClusterClass{}} s.Current.ControlPlane = &scope.ControlPlaneState{} s.Desired = &scope.ClusterState{Cluster: desiredCluster, InfrastructureCluster: infrastructureCluster, ControlPlane: &scope.ControlPlaneState{Object: controlPlane}} @@ -2785,19 +2967,19 @@ func TestReconcileState(t *testing.T) { err = r.reconcileState(ctx, s) g.Expect(err).To(HaveOccurred()) - got := currentdCluster.DeepCopy() - err = env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(currentdCluster), got) + got := currentCluster.DeepCopy() + err = env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(currentCluster), got) g.Expect(err).ToNot(HaveOccurred()) g.Expect(got.Spec.InfrastructureRef).ToNot(BeNil()) g.Expect(got.Spec.ControlPlaneRef).To(BeNil()) - g.Expect(env.CleanupAndWait(ctx, infrastructureCluster, currentdCluster)).To(Succeed()) + g.Expect(env.CleanupAndWait(ctx, infrastructureCluster, currentCluster)).To(Succeed()) }) t.Run("Cluster get reconciled with both infrastructure Ref and control plane ref when both reconcileInfrastructureCluster and reconcileControlPlane pass", func(t *testing.T) { g := NewWithT(t) - currentdCluster := builder.Cluster(metav1.NamespaceDefault, "cluster1").Build() + currentCluster := builder.Cluster(metav1.NamespaceDefault, "cluster1").Build() infrastructureCluster := builder.TestInfrastructureCluster(metav1.NamespaceDefault, "infrastructure-cluster1").Build() controlPlane := builder.TestControlPlane(metav1.NamespaceDefault, "controlplane-cluster1").Build() @@ -2809,12 +2991,12 @@ func TestReconcileState(t *testing.T) { // cluster requires a UID because reconcileClusterShim will create a cluster shim // which has the cluster set as Owner in an OwnerReference. // A valid OwnerReferences requires a uid. - currentdCluster.SetUID("foo") + currentCluster.SetUID("foo") // NOTE: it is ok to use create given that the Cluster are created by user. - g.Expect(env.CreateAndWait(ctx, currentdCluster)).To(Succeed()) + g.Expect(env.CreateAndWait(ctx, currentCluster)).To(Succeed()) - s := scope.New(currentdCluster) + s := scope.New(currentCluster) s.Blueprint = &scope.ClusterBlueprint{ClusterClass: &clusterv1.ClusterClass{}} s.Current.ControlPlane = &scope.ControlPlaneState{} s.Desired = &scope.ClusterState{Cluster: desiredCluster, InfrastructureCluster: infrastructureCluster, ControlPlane: &scope.ControlPlaneState{Object: controlPlane}} @@ -2832,14 +3014,14 @@ func TestReconcileState(t *testing.T) { err = r.reconcileState(ctx, s) g.Expect(err).ToNot(HaveOccurred()) - got := currentdCluster.DeepCopy() - err = env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(currentdCluster), got) + got := currentCluster.DeepCopy() + err = env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(currentCluster), got) g.Expect(err).ToNot(HaveOccurred()) g.Expect(got.Spec.InfrastructureRef).ToNot(BeNil()) g.Expect(got.Spec.ControlPlaneRef).ToNot(BeNil()) - g.Expect(env.CleanupAndWait(ctx, infrastructureCluster, controlPlane, currentdCluster)).To(Succeed()) + g.Expect(env.CleanupAndWait(ctx, infrastructureCluster, controlPlane, currentCluster)).To(Succeed()) }) t.Run("Cluster does not get reconciled when reconcileControlPlane fails and infrastructure Ref is set", func(t *testing.T) { g := NewWithT(t) @@ -2847,7 +3029,7 @@ func TestReconcileState(t *testing.T) { infrastructureCluster := builder.TestInfrastructureCluster(metav1.NamespaceDefault, "infrastructure-cluster1").Build() controlPlane := builder.TestControlPlane(metav1.NamespaceDefault, "controlplane-cluster1").Build() - currentdCluster := builder.Cluster(metav1.NamespaceDefault, "cluster1"). + currentCluster := builder.Cluster(metav1.NamespaceDefault, "cluster1"). WithInfrastructureCluster(infrastructureCluster). Build() @@ -2859,12 +3041,12 @@ func TestReconcileState(t *testing.T) { // cluster requires a UID because reconcileClusterShim will create a cluster shim // which has the cluster set as Owner in an OwnerReference. // A valid OwnerReferences requires a uid. - currentdCluster.SetUID("foo") + currentCluster.SetUID("foo") // NOTE: it is ok to use create given that the Cluster are created by user. - g.Expect(env.CreateAndWait(ctx, currentdCluster)).To(Succeed()) + g.Expect(env.CreateAndWait(ctx, currentCluster)).To(Succeed()) - s := scope.New(currentdCluster) + s := scope.New(currentCluster) s.Blueprint = &scope.ClusterBlueprint{ClusterClass: &clusterv1.ClusterClass{}} s.Current.ControlPlane = &scope.ControlPlaneState{} s.Desired = &scope.ClusterState{Cluster: desiredCluster, InfrastructureCluster: infrastructureCluster, ControlPlane: &scope.ControlPlaneState{Object: controlPlane}} @@ -2885,14 +3067,14 @@ func TestReconcileState(t *testing.T) { err = r.reconcileState(ctx, s) g.Expect(err).To(HaveOccurred()) - got := currentdCluster.DeepCopy() - err = env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(currentdCluster), got) + got := currentCluster.DeepCopy() + err = env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(currentCluster), got) g.Expect(err).ToNot(HaveOccurred()) g.Expect(got.Spec.InfrastructureRef).ToNot(BeNil()) g.Expect(got.Spec.ControlPlaneRef).To(BeNil()) - g.Expect(env.CleanupAndWait(ctx, infrastructureCluster, controlPlane, currentdCluster)).To(Succeed()) + g.Expect(env.CleanupAndWait(ctx, infrastructureCluster, controlPlane, currentCluster)).To(Succeed()) }) }