Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Karthik-K-N committed Jan 23, 2025
1 parent a2f80d0 commit 7f7b497
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 39 deletions.
13 changes: 8 additions & 5 deletions controlplane/kubeadm/internal/controllers/remediation.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,11 +361,6 @@ func getMachineToBeRemediated(unhealthyMachines collections.Machines, isEtcdMana
return nil
}

annotatedMachine := unhealthyMachines.Filter(collections.HasAnnotationKey(clusterv1.RemediateMachineAnnotation)).Oldest()
if annotatedMachine != nil {
return annotatedMachine
}

machinesToBeRemediated := unhealthyMachines.UnsortedList()
if len(machinesToBeRemediated) == 1 {
return machinesToBeRemediated[0]
Expand All @@ -379,6 +374,14 @@ func getMachineToBeRemediated(unhealthyMachines collections.Machines, isEtcdMana

// pickMachineToBeRemediated returns true if machine i should be remediated before machine j.
func pickMachineToBeRemediated(i, j *clusterv1.Machine, isEtcdManaged bool) bool {
// If one machine has the RemediateMachineAnnotation annotation, remediate first.
if annotations.HasRemediateMachine(i) && !annotations.HasRemediateMachine(j) {
return true
}
if !annotations.HasRemediateMachine(i) && annotations.HasRemediateMachine(j) {
return false
}

// if one machine does not have a node ref, we assume that provisioning failed and there is no CP components at all,
// so remediate first; also without a node, it is not possible to get further info about status.
if i.Status.NodeRef == nil && j.Status.NodeRef != nil {
Expand Down
32 changes: 4 additions & 28 deletions controlplane/kubeadm/internal/controllers/remediation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
)

func TestGetMachineToBeRemediated(t *testing.T) {
t.Run("returns the oldest machine with RemediateMachineAnnotation even if there are provisioning machines", func(t *testing.T) {
t.Run("returns the machine with RemediateMachineAnnotation first", func(t *testing.T) {
g := NewWithT(t)

ns, err := env.CreateNamespace(ctx, "ns1")
Expand All @@ -52,37 +52,13 @@ func TestGetMachineToBeRemediated(t *testing.T) {
}()

m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed())
m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed())
m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef())
m4 := createMachine(ctx, g, ns.Name, "m4-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef())

m1.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})
m2.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})

unhealthyMachines := collections.FromMachines(m2, m1, m3, m4)

g.Expect(getMachineToBeRemediated(unhealthyMachines, false).Name).To(HavePrefix("m1-unhealthy-"))
})

t.Run("returns the oldest machine with RemediateMachineAnnotation if there are no provisioning machines", func(t *testing.T) {
g := NewWithT(t)

ns, err := env.CreateNamespace(ctx, "ns1")
g.Expect(err).ToNot(HaveOccurred())
defer func() {
g.Expect(env.Cleanup(ctx, ns)).To(Succeed())
}()

m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed())
m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed())
m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withMachineHealthCheckFailed())
m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef(), withUnhealthyEtcdMember(), withUnhealthyAPIServerPod())

m1.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})
m2.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""})

unhealthyMachines := collections.FromMachines(m2, m1, m3)
unhealthyMachines := collections.FromMachines(m2, m1)

g.Expect(getMachineToBeRemediated(unhealthyMachines, false).Name).To(HavePrefix("m1-unhealthy-"))
g.Expect(getMachineToBeRemediated(unhealthyMachines, true).Name).To(HavePrefix("m1-unhealthy-"))
})

t.Run("returns provisioning machines first", func(t *testing.T) {
Expand Down
25 changes: 22 additions & 3 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import (
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/cluster-api/util/conditions"
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
Expand Down Expand Up @@ -1425,9 +1426,7 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) (
// Sort the machines from newest to oldest.
// We are trying to remediate machines failing to come up first because
// there is a chance that they are not hosting any workloads (minimize disruption).
sort.SliceStable(machinesToRemediate, func(i, j int) bool {
return machinesToRemediate[i].CreationTimestamp.After(machinesToRemediate[j].CreationTimestamp.Time)
})
sortMachinesToRemediate(machinesToRemediate)

// Check if we should limit the in flight operations.
if len(machinesToRemediate) > maxInFlight {
Expand Down Expand Up @@ -1604,3 +1603,23 @@ func (r *Reconciler) reconcileExternalTemplateReference(ctx context.Context, clu

return false, patchHelper.Patch(ctx, obj)
}

// Returns the machines to be remediated in the following order
// - Machines with RemediateMachineAnnotation annotation if any,
// - Machines failing to come up first because
// there is a chance that they are not hosting any workloads (minimize disruption).
func sortMachinesToRemediate(machines []*clusterv1.Machine) {
sort.SliceStable(machines, func(i, j int) bool {
if annotations.HasRemediateMachine(machines[i]) && !annotations.HasRemediateMachine(machines[j]) {
return true
}
if !annotations.HasRemediateMachine(machines[i]) && annotations.HasRemediateMachine(machines[j]) {
return false
}
// Use newest (and Name) as a tie-breaker criteria.
if machines[i].CreationTimestamp.Equal(&machines[j].CreationTimestamp) {
return machines[i].Name < machines[j].Name
}
return machines[i].CreationTimestamp.After(machines[j].CreationTimestamp.Time)
})
}
10 changes: 7 additions & 3 deletions internal/controllers/machineset/machineset_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3047,17 +3047,21 @@ func TestSortMachinesToRemediate(t *testing.T) {

t.Run("remediation machines should be sorted with newest first", func(t *testing.T) {
g := NewWithT(t)
machines := unhealthyMachines
machines := make([]*clusterv1.Machine, len(unhealthyMachines))
copy(machines, unhealthyMachines)
sortMachinesToRemediate(machines)
sort.SliceStable(unhealthyMachines, func(i, j int) bool {
return machines[i].CreationTimestamp.After(machines[j].CreationTimestamp.Time)
return unhealthyMachines[i].CreationTimestamp.After(unhealthyMachines[j].CreationTimestamp.Time)
})
g.Expect(unhealthyMachines).To(Equal(machines))
})

t.Run("remediation machines with annotation should be prioritised over other machines", func(t *testing.T) {
g := NewWithT(t)
machines := append(unhealthyMachines, unhealthyMachinesWithAnnotations...)

machines := make([]*clusterv1.Machine, len(unhealthyMachines))
copy(machines, unhealthyMachines)
machines = append(machines, unhealthyMachinesWithAnnotations...)
sortMachinesToRemediate(machines)

sort.SliceStable(unhealthyMachines, func(i, j int) bool {
Expand Down

0 comments on commit 7f7b497

Please sign in to comment.