From 80ef9d29f46781db2ca1ca9d04b8c7225001ec69 Mon Sep 17 00:00:00 2001 From: Jont828 Date: Wed, 15 Nov 2023 12:57:35 -0500 Subject: [PATCH] Add MachinePool Machine implementation to DockerMachines and DockerMachinePools --- .../machinepool_controller_phases.go | 9 +- internal/webhooks/machine.go | 19 +- internal/webhooks/machine_test.go | 54 --- test/e2e/clusterclass_rollout.go | 13 +- test/framework/ownerreference_helpers.go | 8 +- ...e.cluster.x-k8s.io_dockermachinepools.yaml | 4 + .../docker/config/rbac/role.yaml | 9 + .../docker/exp/api/v1alpha4/conversion.go | 28 +- .../api/v1alpha4/zz_generated.conversion.go | 8 +- .../api/v1beta1/dockermachinepool_types.go | 12 +- .../dockermachinepool_controller.go | 209 ++++++++-- .../dockermachinepool_controller_phases.go | 390 ++++++++++++++++++ .../docker/exp/internal/docker/nodepool.go | 388 ----------------- .../controllers/dockermachine_controller.go | 54 ++- util/labels/helpers.go | 15 + util/labels/helpers_test.go | 57 +++ 16 files changed, 752 insertions(+), 525 deletions(-) create mode 100644 test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go delete mode 100644 test/infrastructure/docker/exp/internal/docker/nodepool.go diff --git a/exp/internal/controllers/machinepool_controller_phases.go b/exp/internal/controllers/machinepool_controller_phases.go index 0eed26251cf3..cbd9c8a35904 100644 --- a/exp/internal/controllers/machinepool_controller_phases.go +++ b/exp/internal/controllers/machinepool_controller_phases.go @@ -47,6 +47,7 @@ import ( "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" utilconversion "sigs.k8s.io/cluster-api/util/conversion" + "sigs.k8s.io/cluster-api/util/labels" "sigs.k8s.io/cluster-api/util/labels/format" "sigs.k8s.io/cluster-api/util/patch" ) @@ -485,12 +486,10 @@ func computeDesiredMachine(mp *expv1.MachinePool, infraMachine *unstructured.Uns func (r *MachinePoolReconciler) infraMachineToMachinePoolMapper(ctx context.Context, o client.Object) []ctrl.Request { log := ctrl.LoggerFrom(ctx) - labels := o.GetLabels() - _, machinePoolOwned := labels[clusterv1.MachinePoolNameLabel] - if machinePoolOwned { - machinePool, err := utilexp.GetMachinePoolByLabels(ctx, r.Client, o.GetNamespace(), labels) + if labels.IsMachinePoolOwned(o) { + machinePool, err := utilexp.GetMachinePoolByLabels(ctx, r.Client, o.GetNamespace(), o.GetLabels()) if err != nil { - log.Error(err, "failed to get MachinePool for InfraMachine", "infraMachine", klog.KObj(o), "labels", labels) + log.Error(err, "failed to get MachinePool for InfraMachine", "infraMachine", klog.KObj(o), "labels", o.GetLabels()) return nil } if machinePool != nil { diff --git a/internal/webhooks/machine.go b/internal/webhooks/machine.go index f36cee1f352e..58cbbfb232c3 100644 --- a/internal/webhooks/machine.go +++ b/internal/webhooks/machine.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/labels" "sigs.k8s.io/cluster-api/util/version" ) @@ -120,7 +121,7 @@ func (webhook *Machine) validate(oldM, newM *clusterv1.Machine) error { specPath := field.NewPath("spec") if newM.Spec.Bootstrap.ConfigRef == nil && newM.Spec.Bootstrap.DataSecretName == nil { // MachinePool Machines don't have a bootstrap configRef, so don't require it. The bootstrap config is instead owned by the MachinePool. - if !isMachinePoolMachine(newM) { + if !labels.IsMachinePoolOwned(newM) { allErrs = append( allErrs, field.Required( @@ -171,19 +172,3 @@ func (webhook *Machine) validate(oldM, newM *clusterv1.Machine) error { } return apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Machine").GroupKind(), newM.Name, allErrs) } - -func isMachinePoolMachine(m *clusterv1.Machine) bool { - if m.Labels != nil { - if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok { - return true - } - } - - for _, owner := range m.OwnerReferences { - if owner.Kind == "MachinePool" { - return true - } - } - - return false -} diff --git a/internal/webhooks/machine_test.go b/internal/webhooks/machine_test.go index 9e4c2a87ab24..3568250cbe20 100644 --- a/internal/webhooks/machine_test.go +++ b/internal/webhooks/machine_test.go @@ -220,60 +220,6 @@ func TestMachineClusterNameImmutable(t *testing.T) { } } -func TestIsMachinePoolMachine(t *testing.T) { - tests := []struct { - name string - machine clusterv1.Machine - isMPM bool - }{ - { - name: "machine is a MachinePoolMachine", - machine: clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - OwnerReferences: []metav1.OwnerReference{ - { - Kind: "MachinePool", - }, - }, - }, - }, - isMPM: true, - }, - { - name: "machine is not a MachinePoolMachine", - machine: clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - OwnerReferences: []metav1.OwnerReference{ - { - Kind: "NotMachinePool", - }, - }, - }, - }, - isMPM: false, - }, - { - name: "machine is not a MachinePoolMachine, no owner references", - machine: clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - OwnerReferences: nil, - }, - }, - isMPM: false, - }, - } - - for i := range tests { - tt := tests[i] - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - - result := isMachinePoolMachine(&tt.machine) - g.Expect(result).To(Equal(tt.isMPM)) - }) - } -} - func TestMachineVersionValidation(t *testing.T) { tests := []struct { name string diff --git a/test/e2e/clusterclass_rollout.go b/test/e2e/clusterclass_rollout.go index 4b897d553da5..11be32180d70 100644 --- a/test/e2e/clusterclass_rollout.go +++ b/test/e2e/clusterclass_rollout.go @@ -47,6 +47,7 @@ import ( "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/labels" "sigs.k8s.io/cluster-api/util/patch" ) @@ -246,6 +247,8 @@ func ClusterClassRolloutSpec(ctx context.Context, inputGetter func() ClusterClas }) By("Verifying all Machines are replaced through rollout") Eventually(func(g Gomega) { + // Note: This excludes MachinePool Machines as they are not replaced by rollout yet. + // This is tracked by https://github.com/kubernetes-sigs/cluster-api/issues/8858. machinesAfterUpgrade := getMachinesByCluster(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster) g.Expect(machinesAfterUpgrade.HasAny(machinesBeforeUpgrade.UnsortedList()...)).To(BeFalse(), "All Machines must be replaced through rollout") }, input.E2EConfig.GetIntervals(specName, "wait-control-plane")...).Should(Succeed()) @@ -898,6 +901,7 @@ func mustMetadata(metadata *clusterv1.ObjectMeta, err error) *clusterv1.ObjectMe } // getMachinesByCluster gets the Machines of a Cluster and returns them as a Set of Machine names. +// Note: This excludes MachinePool Machines as they are not replaced by rollout yet. func getMachinesByCluster(ctx context.Context, client client.Client, cluster *clusterv1.Cluster) sets.Set[string] { machines := sets.Set[string]{} machinesByCluster := framework.GetMachinesByCluster(ctx, framework.GetMachinesByClusterInput{ @@ -905,8 +909,11 @@ func getMachinesByCluster(ctx context.Context, client client.Client, cluster *cl ClusterName: cluster.Name, Namespace: cluster.Namespace, }) - for _, m := range machinesByCluster { - machines.Insert(m.Name) + for i := range machinesByCluster { + m := machinesByCluster[i] + if !labels.IsMachinePoolOwned(&m) { + machines.Insert(m.Name) + } } return machines } @@ -935,7 +942,7 @@ func getMDTopology(cluster *clusterv1.Cluster, md *clusterv1.MachineDeployment) return nil } -// getMPClass looks up the MachinePoolClass for a md in the ClusterClass. +// getMPClass looks up the MachinePoolClass for a MachinePool in the ClusterClass. func getMPClass(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass, mp *expv1.MachinePool) *clusterv1.MachinePoolClass { mpTopology := getMPTopology(cluster, mp) diff --git a/test/framework/ownerreference_helpers.go b/test/framework/ownerreference_helpers.go index 5b5d78861819..4b6b4ae0310f 100644 --- a/test/framework/ownerreference_helpers.go +++ b/test/framework/ownerreference_helpers.go @@ -39,6 +39,7 @@ import ( controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" addonsv1 "sigs.k8s.io/cluster-api/exp/addons/api/v1beta1" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util/patch" ) @@ -288,6 +289,8 @@ var ( dockerMachinePoolTemplateKind = "DockerMachinePoolTemplate" dockerClusterKind = "DockerCluster" dockerClusterTemplateKind = "DockerClusterTemplate" + + dockerMachinePoolController = metav1.OwnerReference{Kind: dockerMachinePoolKind, APIVersion: infraexpv1.GroupVersion.String()} ) // DockerInfraOwnerReferenceAssertions maps Docker Infrastructure types to functions which return an error if the passed @@ -296,8 +299,9 @@ var ( // That document should be updated if these references change. var DockerInfraOwnerReferenceAssertions = map[string]func([]metav1.OwnerReference) error{ dockerMachineKind: func(owners []metav1.OwnerReference) error { - // The DockerMachine must be owned and controlled by a Machine. - return HasExactOwners(owners, machineController) + // The DockerMachine must be owned and controlled by a Machine or a DockerMachinePool. + return HasOneOfExactOwners(owners, []metav1.OwnerReference{machineController}, []metav1.OwnerReference{machineController, dockerMachinePoolController}) + }, dockerMachineTemplateKind: func(owners []metav1.OwnerReference) error { // Base DockerMachineTemplates referenced in a ClusterClass must be owned by the ClusterClass. diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml index 9e1fc52735a2..d1f1d93a4b58 100644 --- a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml @@ -322,6 +322,10 @@ spec: - type type: object type: array + infrastructureMachineKind: + description: InfrastructureMachineKind is the kind of the infrastructure + resources behind MachinePool Machines. + type: string instances: description: Instances contains the status for each instance in the pool diff --git a/test/infrastructure/docker/config/rbac/role.yaml b/test/infrastructure/docker/config/rbac/role.yaml index aa34b0a59758..546705e9a9f6 100644 --- a/test/infrastructure/docker/config/rbac/role.yaml +++ b/test/infrastructure/docker/config/rbac/role.yaml @@ -51,6 +51,15 @@ rules: - get - list - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machines + verbs: + - delete + - get + - list + - watch - apiGroups: - infrastructure.cluster.x-k8s.io resources: diff --git a/test/infrastructure/docker/exp/api/v1alpha4/conversion.go b/test/infrastructure/docker/exp/api/v1alpha4/conversion.go index acc200d9e51a..68b10e18b247 100644 --- a/test/infrastructure/docker/exp/api/v1alpha4/conversion.go +++ b/test/infrastructure/docker/exp/api/v1alpha4/conversion.go @@ -17,21 +17,40 @@ limitations under the License. package v1alpha4 import ( + apiconversion "k8s.io/apimachinery/pkg/conversion" "sigs.k8s.io/controller-runtime/pkg/conversion" infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" + utilconversion "sigs.k8s.io/cluster-api/util/conversion" ) func (src *DockerMachinePool) ConvertTo(dstRaw conversion.Hub) error { dst := dstRaw.(*infraexpv1.DockerMachinePool) - return Convert_v1alpha4_DockerMachinePool_To_v1beta1_DockerMachinePool(src, dst, nil) + if err := Convert_v1alpha4_DockerMachinePool_To_v1beta1_DockerMachinePool(src, dst, nil); err != nil { + return err + } + + // Manually restore data. + restored := &infraexpv1.DockerMachinePool{} + if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + return err + } + + dst.Status.InfrastructureMachineKind = restored.Status.InfrastructureMachineKind + + return nil } func (dst *DockerMachinePool) ConvertFrom(srcRaw conversion.Hub) error { src := srcRaw.(*infraexpv1.DockerMachinePool) - return Convert_v1beta1_DockerMachinePool_To_v1alpha4_DockerMachinePool(src, dst, nil) + if err := Convert_v1beta1_DockerMachinePool_To_v1alpha4_DockerMachinePool(src, dst, nil); err != nil { + return err + } + + // Preserve Hub data on down-conversion except for metadata + return utilconversion.MarshalData(src, dst) } func (src *DockerMachinePoolList) ConvertTo(dstRaw conversion.Hub) error { @@ -45,3 +64,8 @@ func (dst *DockerMachinePoolList) ConvertFrom(srcRaw conversion.Hub) error { return Convert_v1beta1_DockerMachinePoolList_To_v1alpha4_DockerMachinePoolList(src, dst, nil) } + +func Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in *infraexpv1.DockerMachinePoolStatus, out *DockerMachinePoolStatus, s apiconversion.Scope) error { + // NOTE: custom conversion func is required because Status.InfrastructureMachineKind has been added in v1beta1. + return autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in, out, s) +} diff --git a/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go b/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go index 4f302088997d..53081f3c8892 100644 --- a/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go +++ b/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go @@ -95,7 +95,7 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.DockerMachinePoolStatus)(nil), (*DockerMachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + if err := s.AddConversionFunc((*v1beta1.DockerMachinePoolStatus)(nil), (*DockerMachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(a.(*v1beta1.DockerMachinePoolStatus), b.(*DockerMachinePoolStatus), scope) }); err != nil { return err @@ -339,10 +339,6 @@ func autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolSt } else { out.Conditions = nil } + // WARNING: in.InfrastructureMachineKind requires manual conversion: does not exist in peer-type return nil } - -// Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus is an autogenerated conversion function. -func Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in *v1beta1.DockerMachinePoolStatus, out *DockerMachinePoolStatus, s conversion.Scope) error { - return autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in, out, s) -} diff --git a/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go index 13493944938c..e90d7d585e89 100644 --- a/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go +++ b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go @@ -82,6 +82,10 @@ type DockerMachinePoolStatus struct { // Conditions defines current service state of the DockerMachinePool. // +optional Conditions clusterv1.Conditions `json:"conditions,omitempty"` + + // InfrastructureMachineKind is the kind of the infrastructure resources behind MachinePool Machines. + // +optional + InfrastructureMachineKind string `json:"infrastructureMachineKind,omitempty"` } // DockerMachinePoolInstanceStatus contains status information about a DockerMachinePool. @@ -130,13 +134,13 @@ type DockerMachinePool struct { } // GetConditions returns the set of conditions for this object. -func (c *DockerMachinePool) GetConditions() clusterv1.Conditions { - return c.Status.Conditions +func (d *DockerMachinePool) GetConditions() clusterv1.Conditions { + return d.Status.Conditions } // SetConditions sets the conditions on this object. -func (c *DockerMachinePool) SetConditions(conditions clusterv1.Conditions) { - c.Status.Conditions = conditions +func (d *DockerMachinePool) SetConditions(conditions clusterv1.Conditions) { + d.Status.Conditions = conditions } // +kubebuilder:object:root=true diff --git a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go index 67f997ea8f0e..e23f46b44b11 100644 --- a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go +++ b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go @@ -25,6 +25,10 @@ import ( "github.com/pkg/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/tools/record" "k8s.io/klog/v2" "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" @@ -35,17 +39,28 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/external" "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" utilexp "sigs.k8s.io/cluster-api/exp/util" "sigs.k8s.io/cluster-api/test/infrastructure/container" + infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" - "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/internal/docker" + "sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" ) +const ( + // dockerMachinePoolLabel is the label used to identify the DockerMachinePool that is responsible for a Docker container. + dockerMachinePoolLabel = "docker.cluster.x-k8s.io/machine-pool" + + // requeueAfter is how long to wait before checking again to see if the DockerMachines are still provisioning or deleting. + requeueAfter = 10 * time.Second +) + // DockerMachinePoolReconciler reconciles a DockerMachinePool object. type DockerMachinePoolReconciler struct { Client client.Client @@ -55,11 +70,15 @@ type DockerMachinePoolReconciler struct { // WatchFilterValue is the label value used to filter events prior to reconciliation. WatchFilterValue string + + recorder record.EventRecorder + externalTracker external.ObjectTracker } // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=dockermachinepools,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=dockermachinepools/status;dockermachinepools/finalizers,verbs=get;update;patch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch;delete // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch func (r *DockerMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, rerr error) { @@ -121,13 +140,15 @@ func (r *DockerMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Re // Handle deleted machines if !dockerMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { - return ctrl.Result{}, r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool) + return r.reconcileDelete(ctx, cluster, machinePool, dockerMachinePool) } - // Add finalizer first if not set to avoid the race condition between init and delete. + // Add finalizer and the InfrastructureMachineKind if they aren't already present, and requeue if either were added. + // We want to add the finalizer here to avoid the race condition between init and delete. // Note: Finalizers in general can only be added when the deletionTimestamp is not set. - if !controllerutil.ContainsFinalizer(dockerMachinePool, infraexpv1.MachinePoolFinalizer) { - controllerutil.AddFinalizer(dockerMachinePool, infraexpv1.MachinePoolFinalizer) + needsPatch := controllerutil.AddFinalizer(dockerMachinePool, infraexpv1.MachinePoolFinalizer) + needsPatch = setInfrastructureMachineKind(dockerMachinePool) || needsPatch + if needsPatch { return ctrl.Result{}, nil } @@ -149,7 +170,7 @@ func (r *DockerMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr return err } - err = ctrl.NewControllerManagedBy(mgr). + c, err := ctrl.NewControllerManagedBy(mgr). For(&infraexpv1.DockerMachinePool{}). WithOptions(options). WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). @@ -158,31 +179,85 @@ func (r *DockerMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr handler.EnqueueRequestsFromMapFunc(utilexp.MachinePoolToInfrastructureMapFunc( infraexpv1.GroupVersion.WithKind("DockerMachinePool"), ctrl.LoggerFrom(ctx))), ). + Watches( + &infrav1.DockerMachine{}, + handler.EnqueueRequestsFromMapFunc(dockerMachineToDockerMachinePool), + ). Watches( &clusterv1.Cluster{}, handler.EnqueueRequestsFromMapFunc(clusterToDockerMachinePools), builder.WithPredicates( predicates.ClusterUnpausedAndInfrastructureReady(ctrl.LoggerFrom(ctx)), ), - ).Complete(r) + ).Build(r) if err != nil { return errors.Wrap(err, "failed setting up with a controller manager") } + + r.recorder = mgr.GetEventRecorderFor("dockermachinepool-controller") + r.externalTracker = external.ObjectTracker{ + Controller: c, + Cache: mgr.GetCache(), + } + return nil } -func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) error { - pool, err := docker.NewNodePool(ctx, r.Client, cluster, machinePool, dockerMachinePool) +func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + dockerMachineList, err := getDockerMachines(ctx, r.Client, *cluster, *machinePool, *dockerMachinePool) + if err != nil { + return ctrl.Result{}, err + } + + if len(dockerMachineList.Items) > 0 { + log.Info("DockerMachinePool still has dependent DockerMachines, deleting them first and requeuing", "count", len(dockerMachineList.Items)) + + var errs []error + + for _, dockerMachine := range dockerMachineList.Items { + if !dockerMachine.GetDeletionTimestamp().IsZero() { + // Don't handle deleted child + continue + } + + if err := r.deleteMachinePoolMachine(ctx, dockerMachine); err != nil { + err = errors.Wrapf(err, "error deleting DockerMachinePool %s/%s: failed to delete %s %s", dockerMachinePool.Namespace, dockerMachinePool.Name, dockerMachine.Namespace, dockerMachine.Name) + errs = append(errs, err) + } + } + + if len(errs) > 0 { + return ctrl.Result{}, kerrors.NewAggregate(errs) + } + + return ctrl.Result{RequeueAfter: requeueAfter}, nil + } + + // Once there are no DockerMachines left, ensure there are no Docker containers left behind. + // This can occur if deletion began after containers were created but before the DockerMachines were created, or if creation of a DockerMachine failed. + log.Info("DockerMachines have been deleted, deleting any remaining Docker containers") + + labelFilters := map[string]string{dockerMachinePoolLabel: dockerMachinePool.Name} + // List Docker containers, i.e. external machines in the cluster. + externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters) if err != nil { - return errors.Wrap(err, "failed to build new node pool") + return ctrl.Result{}, errors.Wrapf(err, "failed to list all machines in the cluster with label \"%s:%s\"", dockerMachinePoolLabel, dockerMachinePool.Name) } - if err := pool.Delete(ctx); err != nil { - return errors.Wrap(err, "failed to delete all machines in the node pool") + // Providers should similarly ensure that all infrastructure instances are deleted even if the InfraMachine has not been created yet. + for _, externalMachine := range externalMachines { + log.Info("Deleting Docker container", "container", externalMachine.Name()) + if err := externalMachine.Delete(ctx); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete machine %s", externalMachine.Name()) + } } + // Once all DockerMachines and Docker containers are deleted, remove the finalizer. controllerutil.RemoveFinalizer(dockerMachinePool, infraexpv1.MachinePoolFinalizer) - return nil + + return ctrl.Result{}, nil } func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) { @@ -198,30 +273,36 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust machinePool.Spec.Replicas = pointer.Int32(1) } - pool, err := docker.NewNodePool(ctx, r.Client, cluster, machinePool, dockerMachinePool) - if err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to build new node pool") + // First, reconcile the Docker containers, but do not delete any as we need to delete the Machine to ensure node cordon/drain. + // Similarly, providers implementing MachinePool Machines will need to reconcile their analogous infrastructure instances (aside + // from deletion) before reconciling InfraMachinePoolMachines. + if err := r.reconcileDockerContainers(ctx, cluster, machinePool, dockerMachinePool); err != nil { + return ctrl.Result{}, err } - // Reconcile machines and updates Status.Instances - remoteClient, err := r.Tracker.GetClient(ctx, client.ObjectKeyFromObject(cluster)) - if err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to generate workload cluster client") + // Second, once the Docker containers are created, reconcile the DockerMachines. This function creates a DockerMachine for each newly created Docker + // container, and handles container deletion. Instead of deleting an infrastructure instance directly, we want to delete the owner Machine. This will + // trigger a cordon and drain of the node, as well as trigger the deletion of the DockerMachine, which in turn causes the Docker container to be deleted. + // Similarly, providers will need to create InfraMachines for each instance, and instead of deleting instances directly, delete the owner Machine. + if err := r.reconcileDockerMachines(ctx, cluster, machinePool, dockerMachinePool); err != nil { + return ctrl.Result{}, err } - res, err := pool.ReconcileMachines(ctx, remoteClient) + + // Fetch the list of DockerMachines to ensure the provider IDs are up to date. + dockerMachineList, err := getDockerMachines(ctx, r.Client, *cluster, *machinePool, *dockerMachinePool) if err != nil { - return res, err + return ctrl.Result{}, err } - // Derive info from Status.Instances + // Derive providerIDList from the provider ID on each DockerMachine if it exists. The providerID is set by the DockerMachine controller. dockerMachinePool.Spec.ProviderIDList = []string{} - for _, instance := range dockerMachinePool.Status.Instances { - if instance.ProviderID != nil && instance.Ready { - dockerMachinePool.Spec.ProviderIDList = append(dockerMachinePool.Spec.ProviderIDList, *instance.ProviderID) + for _, dockerMachine := range dockerMachineList.Items { + if dockerMachine.Spec.ProviderID != nil { + dockerMachinePool.Spec.ProviderIDList = append(dockerMachinePool.Spec.ProviderIDList, *dockerMachine.Spec.ProviderID) } } - dockerMachinePool.Status.Replicas = int32(len(dockerMachinePool.Status.Instances)) + dockerMachinePool.Status.Replicas = int32(len(dockerMachineList.Items)) if dockerMachinePool.Spec.ProviderID == "" { // This is a fake provider ID which does not tie back to any docker infrastructure. In cloud providers, @@ -230,25 +311,89 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust dockerMachinePool.Spec.ProviderID = getDockerMachinePoolProviderID(cluster.Name, dockerMachinePool.Name) } - dockerMachinePool.Status.Ready = len(dockerMachinePool.Spec.ProviderIDList) == int(*machinePool.Spec.Replicas) + if len(dockerMachinePool.Spec.ProviderIDList) == int(*machinePool.Spec.Replicas) && len(dockerMachineList.Items) == int(*machinePool.Spec.Replicas) { + dockerMachinePool.Status.Ready = true + conditions.MarkTrue(dockerMachinePool, expv1.ReplicasReadyCondition) + + return ctrl.Result{}, nil + } + + dockerMachinePool.Status.Ready = false + conditions.MarkFalse(dockerMachinePool, expv1.ReplicasReadyCondition, expv1.WaitingForReplicasReadyReason, clusterv1.ConditionSeverityInfo, "") // if some machine is still provisioning, force reconcile in few seconds to check again infrastructure. - if !dockerMachinePool.Status.Ready && res.IsZero() { - return ctrl.Result{RequeueAfter: 10 * time.Second}, nil + return ctrl.Result{RequeueAfter: requeueAfter}, nil +} + +func getDockerMachines(ctx context.Context, c client.Client, cluster clusterv1.Cluster, machinePool expv1.MachinePool, dockerMachinePool infraexpv1.DockerMachinePool) (*infrav1.DockerMachineList, error) { + dockerMachineList := &infrav1.DockerMachineList{} + labels := map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + clusterv1.MachinePoolNameLabel: machinePool.Name, } - return res, nil + if err := c.List(ctx, dockerMachineList, client.InNamespace(dockerMachinePool.Namespace), client.MatchingLabels(labels)); err != nil { + return nil, err + } + + return dockerMachineList, nil } func getDockerMachinePoolProviderID(clusterName, dockerMachinePoolName string) string { return fmt.Sprintf("docker:////%s-dmp-%s", clusterName, dockerMachinePoolName) } +// setInfrastructureMachineKind sets the infrastructure machine kind in the status if it is not set already to support +// MachinePool Machines and returns a boolean indicating if the status was updated. +func setInfrastructureMachineKind(dockerMachinePool *infraexpv1.DockerMachinePool) bool { + if dockerMachinePool != nil && dockerMachinePool.Status.InfrastructureMachineKind != "DockerMachine" { + dockerMachinePool.Status.InfrastructureMachineKind = "DockerMachine" + return true + } + + return false +} + +// dockerMachineToDockerMachinePool creates a mapping handler to transform DockerMachine to DockerMachinePools. +func dockerMachineToDockerMachinePool(_ context.Context, o client.Object) []ctrl.Request { + dockerMachine, ok := o.(*infrav1.DockerMachine) + if !ok { + panic(fmt.Sprintf("Expected a DockerMachine but got a %T", o)) + } + + for _, ownerRef := range dockerMachine.GetOwnerReferences() { + gv, err := schema.ParseGroupVersion(ownerRef.APIVersion) + if err != nil { + return nil + } + if ownerRef.Kind == "DockerMachinePool" && gv.Group == infraexpv1.GroupVersion.Group { + return []ctrl.Request{ + { + NamespacedName: types.NamespacedName{ + Name: ownerRef.Name, + Namespace: dockerMachine.Namespace, + }, + }, + } + } + } + + return nil +} + func patchDockerMachinePool(ctx context.Context, patchHelper *patch.Helper, dockerMachinePool *infraexpv1.DockerMachinePool) error { - // TODO: add conditions + conditions.SetSummary(dockerMachinePool, + conditions.WithConditions( + expv1.ReplicasReadyCondition, + ), + ) // Patch the object, ignoring conflicts on the conditions owned by this controller. return patchHelper.Patch( ctx, dockerMachinePool, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.ReadyCondition, + expv1.ReplicasReadyCondition, + }}, ) } diff --git a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go new file mode 100644 index 000000000000..1a7201c53ec6 --- /dev/null +++ b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller_phases.go @@ -0,0 +1,390 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package controllers implements controller functionality. +package controllers + +import ( + "context" + "fmt" + "math/rand" + "sort" + "strings" + + "github.com/blang/semver/v4" + "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/kind/pkg/cluster/constants" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" + infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker" + "sigs.k8s.io/cluster-api/test/infrastructure/kind" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/labels/format" +) + +// reconcileDockerContainers manages the Docker containers for a MachinePool such that it +// - Ensures the number of up-to-date Docker containers is equal to the MachinePool's desired replica count. +// - Does not delete any containers as that must be triggered in reconcileDockerMachines to ensure node cordon/drain. +// +// Providers should similarly create their infrastructure instances and reconcile any additional logic. +func (r *DockerMachinePoolReconciler) reconcileDockerContainers(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) error { + log := ctrl.LoggerFrom(ctx) + + log.V(2).Info("Reconciling Docker containers", "dockerMachinePool", dockerMachinePool.Name, "namespace", dockerMachinePool.Namespace) + + labelFilters := map[string]string{dockerMachinePoolLabel: dockerMachinePool.Name} + + machines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters) + if err != nil { + return errors.Wrapf(err, "failed to list all machines in the cluster") + } + + matchingMachineCount := len(machinesMatchingInfrastructureSpec(ctx, machines, machinePool, dockerMachinePool)) + numToCreate := int(*machinePool.Spec.Replicas) - matchingMachineCount + for i := 0; i < numToCreate; i++ { + log.V(2).Info("Creating a new Docker container for machinePool", "machinePool", machinePool.Name) + name := fmt.Sprintf("worker-%s", util.RandomString(6)) + if err := createDockerContainer(ctx, name, cluster, machinePool, dockerMachinePool); err != nil { + return errors.Wrap(err, "failed to create a new docker machine") + } + } + + return nil +} + +// createDockerContainer creates a Docker container to serve as a replica for the MachinePool. +func createDockerContainer(ctx context.Context, name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) error { + log := ctrl.LoggerFrom(ctx) + labelFilters := map[string]string{dockerMachinePoolLabel: dockerMachinePool.Name} + externalMachine, err := docker.NewMachine(ctx, cluster, name, labelFilters) + if err != nil { + return errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", name) + } + + // NOTE: FailureDomains don't mean much in CAPD since it's all local, but we are setting a label on + // each container, so we can check placement. + labels := map[string]string{} + for k, v := range labelFilters { + labels[k] = v + } + + if len(machinePool.Spec.FailureDomains) > 0 { + // For MachinePools placement is expected to be managed by the underlying infrastructure primitive, but + // given that there is no such an thing in CAPD, we are picking a random failure domain. + randomIndex := rand.Intn(len(machinePool.Spec.FailureDomains)) //nolint:gosec + for k, v := range docker.FailureDomainLabel(&machinePool.Spec.FailureDomains[randomIndex]) { + labels[k] = v + } + } + + log.Info("Creating container for machinePool", "name", name, "machinePool", machinePool.Name) + if err := externalMachine.Create(ctx, dockerMachinePool.Spec.Template.CustomImage, constants.WorkerNodeRoleValue, machinePool.Spec.Template.Spec.Version, labels, dockerMachinePool.Spec.Template.ExtraMounts); err != nil { + return errors.Wrapf(err, "failed to create docker machine with name %s", name) + } + return nil +} + +// reconcileDockerMachines creates and deletes DockerMachines to match the MachinePool's desired number of replicas and infrastructure spec. +// It is responsible for +// - Ensuring each Docker container has an associated DockerMachine by creating one if it doesn't already exist. +// - Ensuring that deletion for Docker container happens by calling delete on the associated Machine so that the node is cordoned/drained and the infrastructure is cleaned up. +// - Deleting DockerMachines referencing a container whose Kubernetes version or custom image no longer matches the spec. +// - Deleting DockerMachines that correspond to a deleted/non-existent Docker container. +// - Deleting DockerMachines when scaling down such that DockerMachines whose owner Machine has the clusterv1.DeleteMachineAnnotation is given priority. +func (r *DockerMachinePoolReconciler) reconcileDockerMachines(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) error { + log := ctrl.LoggerFrom(ctx) + + log.V(2).Info("Reconciling DockerMachines", "dockerMachinePool", dockerMachinePool.Name, "namespace", dockerMachinePool.Namespace) + + dockerMachineList, err := getDockerMachines(ctx, r.Client, *cluster, *machinePool, *dockerMachinePool) + if err != nil { + return err + } + + dockerMachineMap := make(map[string]infrav1.DockerMachine) + for _, dockerMachine := range dockerMachineList.Items { + dockerMachineMap[dockerMachine.Name] = dockerMachine + } + + // List the Docker containers. This corresponds to a InfraMachinePool instance for providers. + labelFilters := map[string]string{dockerMachinePoolLabel: dockerMachinePool.Name} + externalMachines, err := docker.ListMachinesByCluster(ctx, cluster, labelFilters) + if err != nil { + return errors.Wrapf(err, "failed to list all machines in the cluster") + } + + externalMachineMap := make(map[string]*docker.Machine) + for _, externalMachine := range externalMachines { + externalMachineMap[externalMachine.Name()] = externalMachine + } + + // Step 1: + // Create a DockerMachine for each Docker container so we surface the information to the user. Use the same name as the Docker container for the Docker Machine for ease of lookup. + // Providers should iterate through their infrastructure instances and ensure that each instance has a corresponding InfraMachine. + for _, machine := range externalMachines { + if _, ok := dockerMachineMap[machine.Name()]; !ok { + log.V(2).Info("Creating a new DockerMachine for Docker container", "container", machine.Name()) + dockerMachine, err := r.createDockerMachine(ctx, machine.Name(), cluster, machinePool, dockerMachinePool) + if err != nil { + return errors.Wrap(err, "failed to create a new docker machine") + } + + dockerMachineMap[dockerMachine.Name] = *dockerMachine + } else { + log.V(4).Info("DockerMachine already exists, nothing to do", "name", machine.Name()) + } + } + + // Step 2: + // Delete any DockerMachines that correspond to a deleted Docker container. + // Providers should iterate through the InfraMachines to ensure each one still corresponds to an existing infrastructure instance. + // This allows the InfraMachine (and owner Machine) to be deleted and avoid hanging resources when a user deletes an instance out-of-band. + for _, dockerMachine := range dockerMachineMap { + if _, ok := externalMachineMap[dockerMachine.Name]; !ok { + log.V(2).Info("Deleting DockerMachine with no underlying infrastructure", "dockerMachine", dockerMachine.Name) + if err := r.deleteMachinePoolMachine(ctx, dockerMachine); err != nil { + return err + } + + delete(dockerMachineMap, dockerMachine.Name) + } + } + + // Step 3: + // This handles the scale down/excess replicas case and the case where a rolling upgrade is needed. + // If there are more ready DockerMachines than desired replicas, start to delete the excess DockerMachines such that + // - DockerMachines with an outdated Kubernetes version or custom image are deleted first (i.e. the rolling upgrade). + // - DockerMachines whose owner Machine contains the clusterv1.DeleteMachineAnnotation are deleted next (to support cluster autoscaler). + // Note: we want to ensure that there are always enough ready DockerMachines before deleting anything or scaling down. + + // For each DockerMachine, fetch the owner Machine and copy the clusterv1.DeleteMachineAnnotation to the DockerMachine if it exists before sorting the DockerMachines. + // This is done just before sorting to guarantee we have the latest copy of the Machine annotations. + dockerMachinesWithAnnotation, err := r.propagateMachineDeleteAnnotation(ctx, dockerMachineMap) + if err != nil { + return err + } + + // Sort DockerMachines with the clusterv1.DeleteMachineAnnotation to the front of each list. + // If providers already have a sorting order for instance deletion, i.e. oldest first or newest first, the clusterv1.DeleteMachineAnnotation must take priority. + // For example, if deleting by oldest, we expect the InfraMachines with clusterv1.DeleteMachineAnnotation to be deleted first followed by the oldest, and the second oldest, etc. + orderedDockerMachines := orderByDeleteMachineAnnotation(dockerMachinesWithAnnotation) + + // Note: this includes DockerMachines that are out of date but still ready. This is to ensure we always have enough ready DockerMachines before deleting anything. + totalReadyMachines := 0 + for i := range orderedDockerMachines { + dockerMachine := orderedDockerMachines[i] + if dockerMachine.Status.Ready || conditions.IsTrue(&dockerMachine, clusterv1.ReadyCondition) { + totalReadyMachines++ + } + } + + outdatedMachines, readyMachines, err := r.getDeletionCandidates(ctx, orderedDockerMachines, externalMachineMap, machinePool, dockerMachinePool) + if err != nil { + return err + } + + desiredReplicas := int(*machinePool.Spec.Replicas) + overProvisionCount := totalReadyMachines - desiredReplicas + + // Loop through outdated DockerMachines first and decrement the overProvisionCount until it reaches 0. + for _, dockerMachine := range outdatedMachines { + if overProvisionCount > 0 { + log.V(2).Info("Deleting DockerMachine because it is outdated", "dockerMachine", dockerMachine.Name, "namespace", dockerMachine.Namespace) + if err := r.deleteMachinePoolMachine(ctx, dockerMachine); err != nil { + return err + } + + overProvisionCount-- + } + } + + // Then, loop through the ready DockerMachines first and decrement the overProvisionCount until it reaches 0. + for _, dockerMachine := range readyMachines { + if overProvisionCount > 0 { + log.V(2).Info("Deleting DockerMachine because it is an excess replica", "dockerMachine", dockerMachine.Name, "namespace", dockerMachine.Namespace) + if err := r.deleteMachinePoolMachine(ctx, dockerMachine); err != nil { + return err + } + + overProvisionCount-- + } + } + + return nil +} + +// createDockerMachine creates a DockerMachine to represent a Docker container in a DockerMachinePool. +// These DockerMachines have the clusterv1.ClusterNameLabel and clusterv1.MachinePoolNameLabel to support MachinePool Machines. +func (r *DockerMachinePoolReconciler) createDockerMachine(ctx context.Context, name string, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (*infrav1.DockerMachine, error) { + log := ctrl.LoggerFrom(ctx) + + labels := map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + clusterv1.MachinePoolNameLabel: format.MustFormatValue(machinePool.Name), + } + dockerMachine := &infrav1.DockerMachine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: dockerMachinePool.Namespace, + Name: name, + Labels: labels, + Annotations: make(map[string]string), + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: dockerMachinePool.APIVersion, + Kind: dockerMachinePool.Kind, + Name: dockerMachinePool.Name, + UID: dockerMachinePool.UID, + }, + // Note: Since the MachinePool controller has not created its owner Machine yet, we want to set the DockerMachinePool as the owner so it's not orphaned. + }, + }, + Spec: infrav1.DockerMachineSpec{ + CustomImage: dockerMachinePool.Spec.Template.CustomImage, + PreLoadImages: dockerMachinePool.Spec.Template.PreLoadImages, + ExtraMounts: dockerMachinePool.Spec.Template.ExtraMounts, + }, + } + + log.V(2).Info("Creating DockerMachine", "dockerMachine", dockerMachine.Name) + + if err := r.Client.Create(ctx, dockerMachine); err != nil { + return nil, errors.Wrap(err, "failed to create dockerMachine") + } + + return dockerMachine, nil +} + +// deleteMachinePoolMachine attempts to delete a DockerMachine and its associated owner Machine if it exists. +func (r *DockerMachinePoolReconciler) deleteMachinePoolMachine(ctx context.Context, dockerMachine infrav1.DockerMachine) error { + log := ctrl.LoggerFrom(ctx) + + machine, err := util.GetOwnerMachine(ctx, r.Client, dockerMachine.ObjectMeta) + if err != nil { + return errors.Wrapf(err, "error getting owner Machine for DockerMachine %s/%s", dockerMachine.Namespace, dockerMachine.Name) + } + // util.GetOwnerMachine() returns a nil Machine without error if there is no Machine kind in the ownerRefs, so we must verify that machine is not nil. + if machine == nil { + log.V(2).Info("No owner Machine exists for DockerMachine", "dockerMachine", klog.KObj(&dockerMachine)) + + // If the DockerMachine does not have an owner Machine, do not attempt to delete the DockerMachine as the MachinePool controller will create the + // Machine and we want to let it catch up. If we are too hasty to delete, that introduces a race condition where the DockerMachine could be deleted + // just as the Machine comes online. + + // In the case where the MachinePool is being deleted and the Machine will never come online, the DockerMachine will be deleted via its ownerRef to the + // DockerMachinePool, so that is covered as well. + + return nil + } + + log.Info("Deleting Machine for DockerMachine", "machine", klog.KObj(machine), "dockerMachine", klog.KObj(&dockerMachine)) + + if err := r.Client.Delete(ctx, machine); err != nil { + return errors.Wrapf(err, "failed to delete Machine %s/%s", machine.Namespace, machine.Name) + } + + return nil +} + +// propagateMachineDeleteAnnotation returns the DockerMachines for a MachinePool and for each DockerMachine, it copies the owner +// Machine's delete annotation to each DockerMachine if it's present. This is done just in time to ensure that the annotations are +// up to date when we sort for DockerMachine deletion. +func (r *DockerMachinePoolReconciler) propagateMachineDeleteAnnotation(ctx context.Context, dockerMachineSet map[string]infrav1.DockerMachine) ([]infrav1.DockerMachine, error) { + _ = ctrl.LoggerFrom(ctx) + + dockerMachines := []infrav1.DockerMachine{} + for _, dockerMachine := range dockerMachineSet { + machine, err := util.GetOwnerMachine(ctx, r.Client, dockerMachine.ObjectMeta) + if err != nil { + return nil, errors.Wrapf(err, "error getting owner Machine for DockerMachine %s/%s", dockerMachine.Namespace, dockerMachine.Name) + } + if machine != nil && machine.Annotations != nil { + if _, hasDeleteAnnotation := machine.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation { + dockerMachine.Annotations[clusterv1.DeleteMachineAnnotation] = machine.Annotations[clusterv1.DeleteMachineAnnotation] + } + } + + dockerMachines = append(dockerMachines, dockerMachine) + } + + return dockerMachines, nil +} + +// orderByDeleteMachineAnnotation will sort DockerMachines with the clusterv1.DeleteMachineAnnotation to the front of the list. +// It will preserve the existing order of the list otherwise so that it respects the existing delete priority otherwise. +func orderByDeleteMachineAnnotation(machines []infrav1.DockerMachine) []infrav1.DockerMachine { + sort.SliceStable(machines, func(i, j int) bool { + _, iHasAnnotation := machines[i].Annotations[clusterv1.DeleteMachineAnnotation] + + return iHasAnnotation + }) + + return machines +} + +// isMachineMatchingInfrastructureSpec returns true if the Docker container image matches the custom image in the DockerMachinePool spec. +func isMachineMatchingInfrastructureSpec(_ context.Context, machine *docker.Machine, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) bool { + // NOTE: With the current implementation we are checking if the machine is using a kindest/node image for the expected version, + // but not checking if the machine has the expected extra.mounts or pre.loaded images. + + semVer, err := semver.Parse(strings.TrimPrefix(*machinePool.Spec.Template.Spec.Version, "v")) + if err != nil { + // TODO: consider if to return an error + panic(errors.Wrap(err, "failed to parse DockerMachine version").Error()) + } + + kindMapping := kind.GetMapping(semVer, dockerMachinePool.Spec.Template.CustomImage) + + return machine.ContainerImage() == kindMapping.Image +} + +// machinesMatchingInfrastructureSpec returns the Docker containers matching the custom image in the DockerMachinePool spec. +func machinesMatchingInfrastructureSpec(ctx context.Context, machines []*docker.Machine, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) []*docker.Machine { + var matchingMachines []*docker.Machine + for _, machine := range machines { + if isMachineMatchingInfrastructureSpec(ctx, machine, machinePool, dockerMachinePool) { + matchingMachines = append(matchingMachines, machine) + } + } + + return matchingMachines +} + +// getDeletionCandidates returns the DockerMachines for a MachinePool that do not match the infrastructure spec followed by any DockerMachines that are ready and up to date, i.e. matching the infrastructure spec. +func (r *DockerMachinePoolReconciler) getDeletionCandidates(ctx context.Context, dockerMachines []infrav1.DockerMachine, externalMachineSet map[string]*docker.Machine, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (outdatedMachines []infrav1.DockerMachine, readyMatchingMachines []infrav1.DockerMachine, err error) { + for i := range dockerMachines { + dockerMachine := dockerMachines[i] + externalMachine, ok := externalMachineSet[dockerMachine.Name] + if !ok { + // Note: Since we deleted any DockerMachines that do not have an associated Docker container earlier, we should never hit this case. + return nil, nil, errors.Errorf("failed to find externalMachine for DockerMachine %s/%s", dockerMachine.Namespace, dockerMachine.Name) + } + + if !isMachineMatchingInfrastructureSpec(ctx, externalMachine, machinePool, dockerMachinePool) { + outdatedMachines = append(outdatedMachines, dockerMachine) + } else if dockerMachine.Status.Ready || conditions.IsTrue(&dockerMachine, clusterv1.ReadyCondition) { + readyMatchingMachines = append(readyMatchingMachines, dockerMachine) + } + } + + return outdatedMachines, readyMatchingMachines, nil +} diff --git a/test/infrastructure/docker/exp/internal/docker/nodepool.go b/test/infrastructure/docker/exp/internal/docker/nodepool.go deleted file mode 100644 index 12920be700a8..000000000000 --- a/test/infrastructure/docker/exp/internal/docker/nodepool.go +++ /dev/null @@ -1,388 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Package docker implements docker functionality. -package docker - -import ( - "context" - "encoding/base64" - "fmt" - "math/rand" - "strings" - "time" - - "github.com/blang/semver/v4" - "github.com/pkg/errors" - corev1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/kind/pkg/cluster/constants" - - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" - expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" - infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" - "sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker" - "sigs.k8s.io/cluster-api/test/infrastructure/kind" - "sigs.k8s.io/cluster-api/util" -) - -const ( - dockerMachinePoolLabel = "docker.cluster.x-k8s.io/machine-pool" -) - -// NodePool is a wrapper around a collection of like machines which are owned by a DockerMachinePool. A node pool -// provides a friendly way of managing (adding, deleting, updating) a set of docker machines. The node pool will also -// sync the docker machine pool status Instances field with the state of the docker machines. -type NodePool struct { - client client.Client - cluster *clusterv1.Cluster - machinePool *expv1.MachinePool - dockerMachinePool *infraexpv1.DockerMachinePool - labelFilters map[string]string - machines []*docker.Machine -} - -// NewNodePool creates a new node pool instances. -func NewNodePool(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, mp *expv1.MachinePool, dmp *infraexpv1.DockerMachinePool) (*NodePool, error) { - np := &NodePool{ - client: c, - cluster: cluster, - machinePool: mp, - dockerMachinePool: dmp, - labelFilters: map[string]string{dockerMachinePoolLabel: dmp.Name}, - } - - if err := np.refresh(ctx); err != nil { - return np, errors.Wrapf(err, "failed to refresh the node pool") - } - return np, nil -} - -// ReconcileMachines will build enough machines to satisfy the machine pool / docker machine pool spec -// eventually delete all the machine in excess, and update the status for all the machines. -// -// NOTE: The goal for the current implementation is to verify MachinePool construct; accordingly, -// currently the nodepool supports only a recreate strategy for replacing old nodes with new ones -// (all existing machines are killed before new ones are created). -// TODO: consider if to support a Rollout strategy (a more progressive node replacement). -func (np *NodePool) ReconcileMachines(ctx context.Context, remoteClient client.Client) (ctrl.Result, error) { - desiredReplicas := int(*np.machinePool.Spec.Replicas) - - // Delete all the machines in excess (outdated machines or machines exceeding desired replica count). - machineDeleted := false - totalNumberOfMachines := 0 - for _, machine := range np.machines { - totalNumberOfMachines++ - if totalNumberOfMachines > desiredReplicas || !np.isMachineMatchingInfrastructureSpec(machine) { - externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.labelFilters) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", machine.Name()) - } - if err := externalMachine.Delete(ctx); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to delete machine %s", machine.Name()) - } - machineDeleted = true - totalNumberOfMachines-- // remove deleted machine from the count - } - } - if machineDeleted { - if err := np.refresh(ctx); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to refresh the node pool") - } - } - - // Add new machines if missing. - machineAdded := false - matchingMachineCount := len(np.machinesMatchingInfrastructureSpec()) - if matchingMachineCount < desiredReplicas { - for i := 0; i < desiredReplicas-matchingMachineCount; i++ { - if err := np.addMachine(ctx); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to create a new docker machine") - } - machineAdded = true - } - } - if machineAdded { - if err := np.refresh(ctx); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to refresh the node pool") - } - } - - // First remove instance status for machines no longer existing, then reconcile the existing machines. - // NOTE: the status is the only source of truth for understanding if the machine is already bootstrapped, ready etc. - // so we are preserving the existing status and using it as a bases for the next reconcile machine. - instances := make([]infraexpv1.DockerMachinePoolInstanceStatus, 0, len(np.machines)) - for i := range np.dockerMachinePool.Status.Instances { - instance := np.dockerMachinePool.Status.Instances[i] - for j := range np.machines { - if instance.InstanceName == np.machines[j].Name() { - instances = append(instances, instance) - break - } - } - } - np.dockerMachinePool.Status.Instances = instances - - result := ctrl.Result{} - for i := range np.machines { - machine := np.machines[i] - if res, err := np.reconcileMachine(ctx, machine, remoteClient); err != nil || !res.IsZero() { - if err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to reconcile machine") - } - result = util.LowestNonZeroResult(result, res) - } - } - return result, nil -} - -// Delete will delete all of the machines in the node pool. -func (np *NodePool) Delete(ctx context.Context) error { - for _, machine := range np.machines { - externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.labelFilters) - if err != nil { - return errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", machine.Name()) - } - - if err := externalMachine.Delete(ctx); err != nil { - return errors.Wrapf(err, "failed to delete machine %s", machine.Name()) - } - } - - return nil -} - -func (np *NodePool) isMachineMatchingInfrastructureSpec(machine *docker.Machine) bool { - // NOTE: With the current implementation we are checking if the machine is using a kindest/node image for the expected version, - // but not checking if the machine has the expected extra.mounts or pre.loaded images. - - semVer, err := semver.Parse(strings.TrimPrefix(*np.machinePool.Spec.Template.Spec.Version, "v")) - if err != nil { - // TODO: consider if to return an error - panic(errors.Wrap(err, "failed to parse DockerMachine version").Error()) - } - - kindMapping := kind.GetMapping(semVer, np.dockerMachinePool.Spec.Template.CustomImage) - - return machine.ContainerImage() == kindMapping.Image -} - -// machinesMatchingInfrastructureSpec returns all of the docker.Machines which match the machine pool / docker machine pool spec. -func (np *NodePool) machinesMatchingInfrastructureSpec() []*docker.Machine { - var matchingMachines []*docker.Machine - for _, machine := range np.machines { - if np.isMachineMatchingInfrastructureSpec(machine) { - matchingMachines = append(matchingMachines, machine) - } - } - - return matchingMachines -} - -// addMachine will add a new machine to the node pool and update the docker machine pool status. -func (np *NodePool) addMachine(ctx context.Context) error { - instanceName := fmt.Sprintf("worker-%s", util.RandomString(6)) - externalMachine, err := docker.NewMachine(ctx, np.cluster, instanceName, np.labelFilters) - if err != nil { - return errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", instanceName) - } - - // NOTE: FailureDomains don't mean much in CAPD since it's all local, but we are setting a label on - // each container, so we can check placement. - labels := map[string]string{} - for k, v := range np.labelFilters { - labels[k] = v - } - - if len(np.machinePool.Spec.FailureDomains) > 0 { - // For MachinePools placement is expected to be managed by the underlying infrastructure primitive, but - // given that there is no such an thing in CAPD, we are picking a random failure domain. - randomIndex := rand.Intn(len(np.machinePool.Spec.FailureDomains)) //nolint:gosec - for k, v := range docker.FailureDomainLabel(&np.machinePool.Spec.FailureDomains[randomIndex]) { - labels[k] = v - } - } - - if err := externalMachine.Create(ctx, np.dockerMachinePool.Spec.Template.CustomImage, constants.WorkerNodeRoleValue, np.machinePool.Spec.Template.Spec.Version, labels, np.dockerMachinePool.Spec.Template.ExtraMounts); err != nil { - return errors.Wrapf(err, "failed to create docker machine with instance name %s", instanceName) - } - return nil -} - -// refresh asks docker to list all the machines matching the node pool label and updates the cached list of node pool -// machines. -func (np *NodePool) refresh(ctx context.Context) error { - machines, err := docker.ListMachinesByCluster(ctx, np.cluster, np.labelFilters) - if err != nil { - return errors.Wrapf(err, "failed to list all machines in the cluster") - } - - np.machines = make([]*docker.Machine, 0, len(machines)) - for i := range machines { - machine := machines[i] - // makes sure no control plane machines gets selected by chance. - if !machine.IsControlPlane() { - np.machines = append(np.machines, machine) - } - } - return nil -} - -// reconcileMachine will build and provision a docker machine and update the docker machine pool status for that instance. -func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machine, remoteClient client.Client) (ctrl.Result, error) { - log := ctrl.LoggerFrom(ctx) - - var machineStatus infraexpv1.DockerMachinePoolInstanceStatus - isFound := false - for _, instanceStatus := range np.dockerMachinePool.Status.Instances { - if instanceStatus.InstanceName == machine.Name() { - machineStatus = instanceStatus - isFound = true - } - } - if !isFound { - log.Info("Creating instance record", "instance", machine.Name()) - machineStatus = infraexpv1.DockerMachinePoolInstanceStatus{ - InstanceName: machine.Name(), - Version: np.machinePool.Spec.Template.Spec.Version, - } - np.dockerMachinePool.Status.Instances = append(np.dockerMachinePool.Status.Instances, machineStatus) - // return to surface the new machine exists. - return ctrl.Result{Requeue: true}, nil - } - - defer func() { - for i, instanceStatus := range np.dockerMachinePool.Status.Instances { - if instanceStatus.InstanceName == machine.Name() { - np.dockerMachinePool.Status.Instances[i] = machineStatus - } - } - }() - - externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.labelFilters) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", machine.Name()) - } - - // if the machine isn't bootstrapped, only then run bootstrap scripts - if !machineStatus.Bootstrapped { - timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Minute) - defer cancel() - - // Check for bootstrap success - // We have to check here to make this reentrant for cases where the bootstrap works - // but bootstrapped is never set on the object. We only try to bootstrap if the machine - // is not already bootstrapped. - if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, false); err != nil { - log.Info("Bootstrapping instance", "instance", machine.Name()) - if err := externalMachine.PreloadLoadImages(timeoutCtx, np.dockerMachinePool.Spec.Template.PreLoadImages); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to pre-load images into the docker machine with instance name %s", machine.Name()) - } - - bootstrapData, format, err := getBootstrapData(timeoutCtx, np.client, np.machinePool) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to get bootstrap data for instance named %s", machine.Name()) - } - - // Run the bootstrap script. Simulates cloud-init/Ignition. - if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format, np.machinePool.Spec.Template.Spec.Version, np.dockerMachinePool.Spec.Template.CustomImage); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to exec DockerMachinePool instance bootstrap for instance named %s", machine.Name()) - } - // Check for bootstrap success - if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, true); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete") - } - } - machineStatus.Bootstrapped = true - - // return to surface the machine has been bootstrapped. - return ctrl.Result{Requeue: true}, nil - } - - if machineStatus.Addresses == nil { - log.Info("Fetching instance addresses", "instance", machine.Name()) - // set address in machine status - machineAddresses, err := externalMachine.Address(ctx) - if err != nil { - // Requeue if there is an error, as this is likely momentary load balancer - // state changes during control plane provisioning. - return ctrl.Result{Requeue: true}, nil //nolint:nilerr - } - - machineStatus.Addresses = []clusterv1.MachineAddress{ - { - Type: clusterv1.MachineHostName, - Address: externalMachine.ContainerName(), - }, - } - for _, addr := range machineAddresses { - machineStatus.Addresses = append(machineStatus.Addresses, - clusterv1.MachineAddress{ - Type: clusterv1.MachineInternalIP, - Address: addr, - }, - clusterv1.MachineAddress{ - Type: clusterv1.MachineExternalIP, - Address: addr, - }) - } - } - - if machineStatus.ProviderID == nil { - log.Info("Fetching instance provider ID", "instance", machine.Name()) - // Usually a cloud provider will do this, but there is no docker-cloud provider. - // Requeue if there is an error, as this is likely momentary load balancer - // state changes during control plane provisioning. - if err = externalMachine.SetNodeProviderID(ctx, remoteClient); err != nil { - log.V(4).Info("transient error setting the provider id") - return ctrl.Result{Requeue: true}, nil //nolint:nilerr - } - // Set ProviderID so the Cluster API Machine Controller can pull it - providerID := externalMachine.ProviderID() - machineStatus.ProviderID = &providerID - } - - machineStatus.Ready = true - return ctrl.Result{}, nil -} - -// getBootstrapData fetches the bootstrap data for the machine pool. -func getBootstrapData(ctx context.Context, c client.Client, machinePool *expv1.MachinePool) (string, bootstrapv1.Format, error) { - if machinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { - return "", "", errors.New("error retrieving bootstrap data: linked MachinePool's bootstrap.dataSecretName is nil") - } - - s := &corev1.Secret{} - key := client.ObjectKey{Namespace: machinePool.GetNamespace(), Name: *machinePool.Spec.Template.Spec.Bootstrap.DataSecretName} - if err := c.Get(ctx, key, s); err != nil { - return "", "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for DockerMachinePool instance %s", klog.KObj(machinePool)) - } - - value, ok := s.Data["value"] - if !ok { - return "", "", errors.New("error retrieving bootstrap data: secret value key is missing") - } - - format := s.Data["format"] - if len(format) == 0 { - format = []byte(bootstrapv1.CloudConfig) - } - - return base64.StdEncoding.EncodeToString(value), bootstrapv1.Format(format), nil -} diff --git a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go index f24c0bd5b6ba..68b3553ff983 100644 --- a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go @@ -38,12 +38,14 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/remote" + utilexp "sigs.k8s.io/cluster-api/exp/util" "sigs.k8s.io/cluster-api/test/infrastructure/container" infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" "sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/labels" clog "sigs.k8s.io/cluster-api/util/log" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" @@ -157,8 +159,18 @@ func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, nil } - // Create a helper for managing the docker container hosting the machine. - externalMachine, err := docker.NewMachine(ctx, cluster, machine.Name, nil) + // Create a helper for managing the Docker container hosting the machine. + // The DockerMachine needs a way to know the name of the Docker container, before MachinePool Machines were implemented, it used the name of the owner Machine. + // But since the DockerMachine type is used for both MachineDeployment Machines and MachinePool Machines, we need to accommodate both: + // - For MachineDeployments/Control Planes, we continue using the name of the Machine so that it is backwards compatible in the CAPI version upgrade scenario . + // - For MachinePools, the order of creation is Docker container -> DockerMachine -> Machine. Since the Docker container is created first, and the Machine name is + // randomly generated by the MP controller, we create the DockerMachine with the same name as the container to maintain this association. + name := machine.Name + if labels.IsMachinePoolOwned(dockerMachine) { + name = dockerMachine.Name + } + + externalMachine, err := docker.NewMachine(ctx, cluster, name, nil) if err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine") } @@ -221,6 +233,26 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * return ctrl.Result{}, nil } + var dataSecretName *string + var version *string + + if labels.IsMachinePoolOwned(dockerMachine) { + machinePool, err := utilexp.GetMachinePoolByLabels(ctx, r.Client, dockerMachine.GetNamespace(), dockerMachine.Labels) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to get machine pool for DockerMachine %s/%s", dockerMachine.GetNamespace(), dockerMachine.GetName()) + } + if machinePool == nil { + log.Info("No MachinePool matching labels found, returning without error") + return ctrl.Result{}, nil + } + + dataSecretName = machinePool.Spec.Template.Spec.Bootstrap.DataSecretName + version = machinePool.Spec.Template.Spec.Version + } else { + dataSecretName = machine.Spec.Bootstrap.DataSecretName + version = machine.Spec.Version + } + // if the machine is already provisioned, return if dockerMachine.Spec.ProviderID != nil { // ensure ready state is set. @@ -240,7 +272,7 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * } // Make sure bootstrap data is available and populated. - if machine.Spec.Bootstrap.DataSecretName == nil { + if dataSecretName == nil { if !util.IsControlPlaneMachine(machine) && !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { log.Info("Waiting for the control plane to be initialized") conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, clusterv1.WaitingForControlPlaneAvailableReason, clusterv1.ConditionSeverityInfo, "") @@ -318,7 +350,8 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * // but bootstrapped is never set on the object. We only try to bootstrap if the machine // is not already bootstrapped. if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, false); err != nil { - bootstrapData, format, err := r.getBootstrapData(timeoutCtx, machine) + // We know the bootstrap data is not nil because we checked above. + bootstrapData, format, err := r.getBootstrapData(timeoutCtx, dockerMachine.Namespace, *dataSecretName) if err != nil { return ctrl.Result{}, err } @@ -346,10 +379,11 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * }() // Run the bootstrap script. Simulates cloud-init/Ignition. - if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format, machine.Spec.Version, dockerMachine.Spec.CustomImage); err != nil { + if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format, version, dockerMachine.Spec.CustomImage); err != nil { conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap") return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap") } + // Check for bootstrap success if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, true); err != nil { conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap") @@ -501,15 +535,11 @@ func (r *DockerMachineReconciler) DockerClusterToDockerMachines(ctx context.Cont return result } -func (r *DockerMachineReconciler) getBootstrapData(ctx context.Context, machine *clusterv1.Machine) (string, bootstrapv1.Format, error) { - if machine.Spec.Bootstrap.DataSecretName == nil { - return "", "", errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") - } - +func (r *DockerMachineReconciler) getBootstrapData(ctx context.Context, namespace string, dataSecretName string) (string, bootstrapv1.Format, error) { s := &corev1.Secret{} - key := client.ObjectKey{Namespace: machine.GetNamespace(), Name: *machine.Spec.Bootstrap.DataSecretName} + key := client.ObjectKey{Namespace: namespace, Name: dataSecretName} if err := r.Client.Get(ctx, key, s); err != nil { - return "", "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for DockerMachine %s", klog.KObj(machine)) + return "", "", errors.Wrapf(err, "failed to retrieve bootstrap data secret %s", dataSecretName) } value, ok := s.Data["value"] diff --git a/util/labels/helpers.go b/util/labels/helpers.go index 848ec37cdf40..c13ee48e1e19 100644 --- a/util/labels/helpers.go +++ b/util/labels/helpers.go @@ -29,6 +29,21 @@ func IsTopologyOwned(o metav1.Object) bool { return ok } +// IsMachinePoolOwned returns true if the object has the `cluster.x-k8s.io/pool-name` label or is owned by a MachinePool. +func IsMachinePoolOwned(o metav1.Object) bool { + if _, ok := o.GetLabels()[clusterv1.MachinePoolNameLabel]; ok { + return true + } + + for _, owner := range o.GetOwnerReferences() { + if owner.Kind == "MachinePool" { + return true + } + } + + return false +} + // HasWatchLabel returns true if the object has a label with the WatchLabel key matching the given value. func HasWatchLabel(o metav1.Object, labelValue string) bool { val, ok := o.GetLabels()[clusterv1.WatchLabel] diff --git a/util/labels/helpers_test.go b/util/labels/helpers_test.go index d627014800f2..00af2f5abb1b 100644 --- a/util/labels/helpers_test.go +++ b/util/labels/helpers_test.go @@ -84,3 +84,60 @@ func TestHasWatchLabel(t *testing.T) { }) } } + +func TestIsMachinePoolOwned(t *testing.T) { + tests := []struct { + name string + object metav1.Object + expected bool + }{ + { + name: "machine is a MachinePoolMachine", + object: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + clusterv1.MachinePoolNameLabel: "foo", + }, + }, + }, + expected: true, + }, + { + name: "random type is machinepool owned", + object: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + clusterv1.MachinePoolNameLabel: "foo", + }, + }, + }, + expected: true, + }, + { + name: "machine not a MachinePoolMachine with random labels", + object: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "foo": "bar", + }, + }, + }, + expected: false, + }, + { + name: "machine is not a MachinePoolMachine with no labels", + object: &clusterv1.Machine{}, + expected: false, + }, + } + + for i := range tests { + tt := tests[i] + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + result := IsMachinePoolOwned(tt.object) + g.Expect(result).To(Equal(tt.expected)) + }) + } +}