Skip to content

Commit

Permalink
Do not update KCP.Status and MS.Status when unable to get workload cl…
Browse files Browse the repository at this point in the history
…uster
  • Loading branch information
jessehu committed Apr 5, 2024
1 parent 5f4f143 commit 21afe2b
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 6 deletions.
8 changes: 8 additions & 0 deletions controlplane/kubeadm/internal/controllers/fakes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ func (f *fakeManagementCluster) GetMachinePoolsForCluster(c context.Context, clu
return f.MachinePools, nil
}

type fakeManagementClusterWithError struct {
fakeManagementCluster
}

func (f *fakeManagementClusterWithError) GetWorkloadCluster(_ context.Context, _ client.ObjectKey) (internal.WorkloadCluster, error) {
return nil, errors.New("failed to get workload cluster")
}

type fakeWorkloadCluster struct {
*internal.Workload
Status internal.ClusterStatus
Expand Down
6 changes: 3 additions & 3 deletions controlplane/kubeadm/internal/controllers/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, contro
replicas := int32(len(controlPlane.Machines))
desiredReplicas := *controlPlane.KCP.Spec.Replicas

// set basic data that does not require interacting with the workload cluster
// Set basic data that does not require interacting with the workload cluster.
controlPlane.KCP.Status.Replicas = replicas
controlPlane.KCP.Status.ReadyReplicas = 0
controlPlane.KCP.Status.UnavailableReplicas = replicas
// Set UnavailableReplicas to `replicas` when KCP is newly created, otherwise keep it unchanged. So as to avoid updating it when unable to get the workload cluster.
controlPlane.KCP.Status.UnavailableReplicas = replicas - controlPlane.KCP.Status.ReadyReplicas

// Return early if the deletion timestamp is set, because we don't want to try to connect to the workload cluster
// and we don't want to report resize condition (because it is set to deleting into reconcile delete).
Expand Down
74 changes: 74 additions & 0 deletions controlplane/kubeadm/internal/controllers/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,80 @@ func TestKubeadmControlPlaneReconciler_updateStatusMachinesReadyMixed(t *testing
g.Expect(kcp.Status.Ready).To(BeTrue())
}

func TestKubeadmControlPlaneReconciler_updateStatusCannotGetWorkloadClusterStatus(t *testing.T) {
g := NewWithT(t)

cluster := &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: metav1.NamespaceDefault,
},
}

kcp := &controlplanev1.KubeadmControlPlane{
TypeMeta: metav1.TypeMeta{
Kind: "KubeadmControlPlane",
APIVersion: controlplanev1.GroupVersion.String(),
},
ObjectMeta: metav1.ObjectMeta{
Namespace: cluster.Namespace,
Name: "foo",
},
Spec: controlplanev1.KubeadmControlPlaneSpec{
Version: "v1.16.6",
MachineTemplate: controlplanev1.KubeadmControlPlaneMachineTemplate{
InfrastructureRef: corev1.ObjectReference{
APIVersion: "test/v1alpha1",
Kind: "UnknownInfraMachine",
Name: "foo",
},
},
},
Status: controlplanev1.KubeadmControlPlaneStatus{
Ready: true,
Replicas: 3,
ReadyReplicas: 3,
UpdatedReplicas: 3,
UnavailableReplicas: 0,
},
}
webhook := &controlplanev1webhooks.KubeadmControlPlane{}
g.Expect(webhook.Default(ctx, kcp)).To(Succeed())
_, err := webhook.ValidateCreate(ctx, kcp)
g.Expect(err).ToNot(HaveOccurred())

machines := map[string]*clusterv1.Machine{}
objs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy()}
for i := 0; i < 3; i++ {
name := fmt.Sprintf("test-%d", i)
m, n := createMachineNodePair(name, cluster, kcp, true)
objs = append(objs, n, m)
machines[m.Name] = m
}

fakeClient := newFakeClient(objs...)

r := &KubeadmControlPlaneReconciler{
Client: fakeClient,
managementCluster: &fakeManagementClusterWithError{},
recorder: record.NewFakeRecorder(32),
}

controlPlane := &internal.ControlPlane{
KCP: kcp,
Cluster: cluster,
Machines: machines,
}
controlPlane.InjectTestManagementCluster(r.managementCluster)

// When updateStatus() returns a non-nil error(e.g. unable to get workload cluster), the original kcp.Status should not be updated.
g.Expect(r.updateStatus(ctx, controlPlane)).To(HaveOccurred())
g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(3))
g.Expect(kcp.Status.ReadyReplicas).To(BeEquivalentTo(3))
g.Expect(kcp.Status.UnavailableReplicas).To(BeEquivalentTo(0))
g.Expect(kcp.Status.Ready).To(BeTrue())
}

func TestKubeadmControlPlaneReconciler_machinesCreatedIsIsTrueEvenWhenTheNodesAreNotReady(t *testing.T) {
g := NewWithT(t)

Expand Down
9 changes: 6 additions & 3 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,8 @@ func (r *Reconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
}

// updateStatus updates the Status field for the MachineSet
// It checks for the current state of the replicas and updates the Status of the MachineSet.
// It checks for the current state of the replicas and updates the Status field of the MachineSet.
// When unable to retrieve the Node status, it returns error and won't update the Status field of the MachineSet.
func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) error {
log := ctrl.LoggerFrom(ctx)
newStatus := ms.Status.DeepCopy()
Expand Down Expand Up @@ -882,8 +883,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste

node, err := r.getMachineNode(ctx, cluster, machine)
if err != nil && machine.GetDeletionTimestamp().IsZero() {
log.Error(err, "Unable to retrieve Node status", "node", klog.KObj(node))
continue
return errors.Wrapf(err, "unable to retrieve the status of Node %s", klog.KObj(node))
}

if noderefutil.IsNodeReady(node) {
Expand Down Expand Up @@ -956,6 +956,9 @@ func (r *Reconciler) getMachineNode(ctx context.Context, cluster *clusterv1.Clus
}
node := &corev1.Node{}
if err := remoteClient.Get(ctx, client.ObjectKey{Name: machine.Status.NodeRef.Name}, node); err != nil {
if apierrors.IsNotFound(err) {
return nil, nil
}
return nil, errors.Wrapf(err, "error retrieving node %s for machine %s/%s", machine.Status.NodeRef.Name, machine.Namespace, machine.Name)
}
return node, nil
Expand Down

0 comments on commit 21afe2b

Please sign in to comment.