Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Karthik-K-N committed Jan 22, 2025
1 parent a2f80d0 commit b8fd7be
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 36 deletions.
13 changes: 8 additions & 5 deletions controlplane/kubeadm/internal/controllers/remediation.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,11 +361,6 @@ func getMachineToBeRemediated(unhealthyMachines collections.Machines, isEtcdMana
return nil
}

annotatedMachine := unhealthyMachines.Filter(collections.HasAnnotationKey(clusterv1.RemediateMachineAnnotation)).Oldest()
if annotatedMachine != nil {
return annotatedMachine
}

machinesToBeRemediated := unhealthyMachines.UnsortedList()
if len(machinesToBeRemediated) == 1 {
return machinesToBeRemediated[0]
Expand All @@ -379,6 +374,14 @@ func getMachineToBeRemediated(unhealthyMachines collections.Machines, isEtcdMana

// pickMachineToBeRemediated returns true if machine i should be remediated before machine j.
func pickMachineToBeRemediated(i, j *clusterv1.Machine, isEtcdManaged bool) bool {
// If one machine has the RemediateMachineAnnotation annotation, remediate first.
if annotations.HasRemediateMachine(i) && !annotations.HasRemediateMachine(j) {
return true
}
if !annotations.HasRemediateMachine(i) && annotations.HasRemediateMachine(j) {
return false
}

// if one machine does not have a node ref, we assume that provisioning failed and there is no CP components at all,
// so remediate first; also without a node, it is not possible to get further info about status.
if i.Status.NodeRef == nil && j.Status.NodeRef != nil {
Expand Down
32 changes: 4 additions & 28 deletions controlplane/kubeadm/internal/controllers/remediation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
)

func TestGetMachineToBeRemediated(t *testing.T) {
t.Run("returns the oldest machine with RemediateMachineAnnotation even if there are provisioning machines", func(t *testing.T) {
t.Run("returns the machine with RemediateMachineAnnotation first", func(t *testing.T) {
g := NewWithT(t)

ns, err := env.CreateNamespace(ctx, "ns1")
Expand All @@ -52,37 +52,13 @@ func TestGetMachineToBeRemediated(t *testing.T) {
}()

m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed())
m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed())
m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef())
m4 := createMachine(ctx, g, ns.Name, "m4-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef())

m1.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})
m2.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})

unhealthyMachines := collections.FromMachines(m2, m1, m3, m4)

g.Expect(getMachineToBeRemediated(unhealthyMachines, false).Name).To(HavePrefix("m1-unhealthy-"))
})

t.Run("returns the oldest machine with RemediateMachineAnnotation if there are no provisioning machines", func(t *testing.T) {
g := NewWithT(t)

ns, err := env.CreateNamespace(ctx, "ns1")
g.Expect(err).ToNot(HaveOccurred())
defer func() {
g.Expect(env.Cleanup(ctx, ns)).To(Succeed())
}()

m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed())
m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed())
m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withMachineHealthCheckFailed())
m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef(), withUnhealthyEtcdMember(), withUnhealthyAPIServerPod())

m1.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})
m2.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})

unhealthyMachines := collections.FromMachines(m2, m1, m3)
unhealthyMachines := collections.FromMachines(m2, m1)

g.Expect(getMachineToBeRemediated(unhealthyMachines, false).Name).To(HavePrefix("m1-unhealthy-"))
g.Expect(getMachineToBeRemediated(unhealthyMachines, true).Name).To(HavePrefix("m1-unhealthy-"))
})

t.Run("returns provisioning machines first", func(t *testing.T) {
Expand Down
21 changes: 18 additions & 3 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import (
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/cluster-api/util/conditions"
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
Expand Down Expand Up @@ -1425,9 +1426,7 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) (
// Sort the machines from newest to oldest.
// We are trying to remediate machines failing to come up first because
// there is a chance that they are not hosting any workloads (minimize disruption).
sort.SliceStable(machinesToRemediate, func(i, j int) bool {
return machinesToRemediate[i].CreationTimestamp.After(machinesToRemediate[j].CreationTimestamp.Time)
})
sortMachinesToRemediate(machinesToRemediate)

// Check if we should limit the in flight operations.
if len(machinesToRemediate) > maxInFlight {
Expand Down Expand Up @@ -1604,3 +1603,19 @@ func (r *Reconciler) reconcileExternalTemplateReference(ctx context.Context, clu

return false, patchHelper.Patch(ctx, obj)
}

// Returns the machines to be remediated in the following order
// - Machines with RemediateMachineAnnotation annotation if any,
// - Machines failing to come up first because
// there is a chance that they are not hosting any workloads (minimize disruption).
func sortMachinesToRemediate(machines []*clusterv1.Machine) {
sort.SliceStable(machines, func(i, j int) bool {
if annotations.HasRemediateMachine(machines[i]) && !annotations.HasRemediateMachine(machines[j]) {
return true
}
if !annotations.HasRemediateMachine(machines[i]) && annotations.HasRemediateMachine(machines[j]) {
return false
}
return machines[i].CreationTimestamp.After(machines[j].CreationTimestamp.Time)
})
}

0 comments on commit b8fd7be

Please sign in to comment.