From 7770f9c5805a02ed9f42bae24eec5288862ff329 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 22 Nov 2023 17:29:37 -0800 Subject: [PATCH] Add implementation for NodeUpgradeController (#7061) * Add implementation for NodeUpgradeController * Add conditions to NodeUpgrader * Fix linter and add unit tests * improve codecov * cleanup spec objects * Add some TODOs for cleanup * fix linter * Add more coverage * resolve comments --- Makefile | 1 + ...ywhere.eks.amazonaws.com_nodeupgrades.yaml | 54 ++- config/manifest/eksa-components.yaml | 74 +++- config/rbac/role.yaml | 20 + controllers/factory.go | 2 + controllers/factory_test.go | 60 +++ controllers/mocks/nodeupgrade_controller.go | 51 +++ controllers/nodeupgrade_controller.go | 372 +++++++++++++++++- controllers/nodeupgrade_controller_test.go | 286 ++++++++++++++ pkg/api/v1alpha1/nodeupgrade_types.go | 60 ++- pkg/api/v1alpha1/zz_generated.deepcopy.go | 3 +- ...cted_first_control_plane_upgrader_pod.yaml | 114 ++++++ ...ected_rest_control_plane_upgrader_pod.yaml | 112 ++++++ .../expected_worker_upgrader_pod.yaml | 112 ++++++ pkg/nodeupgrader/upgrader.go | 144 +++++++ pkg/nodeupgrader/upgrader_test.go | 48 +++ 16 files changed, 1459 insertions(+), 54 deletions(-) create mode 100644 controllers/mocks/nodeupgrade_controller.go create mode 100644 controllers/nodeupgrade_controller_test.go create mode 100644 pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml create mode 100755 pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml create mode 100755 pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml create mode 100644 pkg/nodeupgrader/upgrader.go create mode 100644 pkg/nodeupgrader/upgrader_test.go diff --git a/Makefile b/Makefile index 42f99a0be0e3..2a2ef6067497 100644 --- a/Makefile +++ b/Makefile @@ -603,6 +603,7 @@ mocks: ## Generate mocks ${MOCKGEN} -destination=pkg/controller/clusters/mocks/ipvalidator.go -package=mocks -source "pkg/controller/clusters/ipvalidator.go" IPUniquenessValidator ${MOCKGEN} -destination=pkg/registry/mocks/storage.go -package=mocks -source "pkg/registry/storage.go" StorageClient ${MOCKGEN} -destination=pkg/registry/mocks/repository.go -package=mocks oras.land/oras-go/v2/registry Repository + ${MOCKGEN} -destination=controllers/mocks/nodeupgrade_controller.go -package=mocks -source "controllers/nodeupgrade_controller.go" RemoteClientRegistry .PHONY: verify-mocks verify-mocks: mocks ## Verify if mocks need to be updated diff --git a/config/crd/bases/anywhere.eks.amazonaws.com_nodeupgrades.yaml b/config/crd/bases/anywhere.eks.amazonaws.com_nodeupgrades.yaml index 3aae1ebc06e1..90ea218b0ab5 100644 --- a/config/crd/bases/anywhere.eks.amazonaws.com_nodeupgrades.yaml +++ b/config/crd/bases/anywhere.eks.amazonaws.com_nodeupgrades.yaml @@ -40,29 +40,53 @@ spec: type: string etcdVersion: type: string - kubeletVersion: - type: string + firstNodeToBeUpgraded: + description: FirstNodeToBeUpgraded signifies that the Node is the + first node to be upgraded. This flag is only valid for control plane + nodes and ignored for worker nodes. + type: boolean kubernetesVersion: type: string machine: + description: Machine is a reference to the CAPI Machine that needs + to be upgraded. properties: - kind: + apiVersion: + description: API version of the referent. type: string - name: + fieldPath: + description: 'If referring to a piece of an object instead of + an entire object, this string should contain a valid JSON/Go + field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within + a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" + (container with index 2 in this pod). This syntax is chosen + only to have some well-defined way of referencing a part of + an object. TODO: this design is not final and this field is + subject to change in the future.' type: string - type: object - node: - properties: kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference + is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' type: string type: object required: - - kubeletVersion - kubernetesVersion - machine - - node type: object status: description: NodeUpgradeStatus defines the observed state of NodeUpgrade. @@ -70,8 +94,6 @@ spec: completed: type: boolean conditions: - description: Conditions provide observations of the operational state - of a Cluster API resource. items: description: Condition defines an observation of a Cluster API resource operational state. @@ -115,11 +137,11 @@ spec: - type type: object type: array - phase: - type: string - required: - - completed - - phase + observedGeneration: + description: ObservedGeneration is the latest generation observed + by the controller. + format: int64 + type: integer type: object type: object served: true diff --git a/config/manifest/eksa-components.yaml b/config/manifest/eksa-components.yaml index 829028d09aca..e74b83eef495 100644 --- a/config/manifest/eksa-components.yaml +++ b/config/manifest/eksa-components.yaml @@ -4893,29 +4893,53 @@ spec: type: string etcdVersion: type: string - kubeletVersion: - type: string + firstNodeToBeUpgraded: + description: FirstNodeToBeUpgraded signifies that the Node is the + first node to be upgraded. This flag is only valid for control plane + nodes and ignored for worker nodes. + type: boolean kubernetesVersion: type: string machine: + description: Machine is a reference to the CAPI Machine that needs + to be upgraded. properties: - kind: + apiVersion: + description: API version of the referent. type: string - name: + fieldPath: + description: 'If referring to a piece of an object instead of + an entire object, this string should contain a valid JSON/Go + field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within + a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" + (container with index 2 in this pod). This syntax is chosen + only to have some well-defined way of referencing a part of + an object. TODO: this design is not final and this field is + subject to change in the future.' type: string - type: object - node: - properties: kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' type: string name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference + is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' type: string type: object required: - - kubeletVersion - kubernetesVersion - machine - - node type: object status: description: NodeUpgradeStatus defines the observed state of NodeUpgrade. @@ -4923,8 +4947,6 @@ spec: completed: type: boolean conditions: - description: Conditions provide observations of the operational state - of a Cluster API resource. items: description: Condition defines an observation of a Cluster API resource operational state. @@ -4968,11 +4990,11 @@ spec: - type type: object type: array - phase: - type: string - required: - - completed - - phase + observedGeneration: + description: ObservedGeneration is the latest generation observed + by the controller. + format: int64 + type: integer type: object type: object served: true @@ -6659,6 +6681,16 @@ rules: - nodes verbs: - list +- apiGroups: + - "" + resources: + - pods + verbs: + - create + - delete + - get + - list + - watch - apiGroups: - "" resources: @@ -6888,6 +6920,16 @@ rules: - list - patch - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machines + verbs: + - get + - list + - patch + - update + - watch - apiGroups: - clusterctl.cluster.x-k8s.io resources: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 310d5a313ec4..b7b7a278f06f 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -27,6 +27,16 @@ rules: - nodes verbs: - list +- apiGroups: + - "" + resources: + - pods + verbs: + - create + - delete + - get + - list + - watch - apiGroups: - "" resources: @@ -256,6 +266,16 @@ rules: - list - patch - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machines + verbs: + - get + - list + - patch + - update + - watch - apiGroups: - clusterctl.cluster.x-k8s.io resources: diff --git a/controllers/factory.go b/controllers/factory.go index ccef722b2483..7ff9c4764c2d 100644 --- a/controllers/factory.go +++ b/controllers/factory.go @@ -590,6 +590,7 @@ func (f *Factory) WithMachineDeploymentUpgradeReconciler() *Factory { // WithNodeUpgradeReconciler builds the WithNodeUpgrade reconciler. func (f *Factory) WithNodeUpgradeReconciler() *Factory { + f.withTracker() f.buildSteps = append(f.buildSteps, func(ctx context.Context) error { if f.reconcilers.NodeUpgradeReconciler != nil { return nil @@ -597,6 +598,7 @@ func (f *Factory) WithNodeUpgradeReconciler() *Factory { f.reconcilers.NodeUpgradeReconciler = NewNodeUpgradeReconciler( f.manager.GetClient(), + f.tracker, ) return nil diff --git a/controllers/factory_test.go b/controllers/factory_test.go index 2aa8c1eb0e86..2618558bff28 100644 --- a/controllers/factory_test.go +++ b/controllers/factory_test.go @@ -229,3 +229,63 @@ func TestFactoryWithNutanixDatacenterReconciler(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(reconcilers.NutanixDatacenterReconciler).NotTo(BeNil()) } + +func TestFactoryWithNodeUpgradeReconciler(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + logger := nullLog() + ctrl := gomock.NewController(t) + manager := mocks.NewMockManager(ctrl) + manager.EXPECT().GetClient().AnyTimes() + manager.EXPECT().GetScheme().AnyTimes() + + f := controllers.NewFactory(logger, manager). + WithNodeUpgradeReconciler() + + // testing idempotence + f.WithNodeUpgradeReconciler() + + reconcilers, err := f.Build(ctx) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(reconcilers.NodeUpgradeReconciler).NotTo(BeNil()) +} + +func TestFactoryWithControlPlaneUpgradeReconciler(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + logger := nullLog() + ctrl := gomock.NewController(t) + manager := mocks.NewMockManager(ctrl) + manager.EXPECT().GetClient().AnyTimes() + manager.EXPECT().GetScheme().AnyTimes() + + f := controllers.NewFactory(logger, manager). + WithControlPlaneUpgradeReconciler() + + // testing idempotence + f.WithControlPlaneUpgradeReconciler() + + reconcilers, err := f.Build(ctx) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(reconcilers.ControlPlaneUpgradeReconciler).NotTo(BeNil()) +} + +func TestFactoryWithMachineDeploymentUpgradeReconciler(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + logger := nullLog() + ctrl := gomock.NewController(t) + manager := mocks.NewMockManager(ctrl) + manager.EXPECT().GetClient().AnyTimes() + manager.EXPECT().GetScheme().AnyTimes() + + f := controllers.NewFactory(logger, manager). + WithMachineDeploymentUpgradeReconciler() + + // testing idempotence + f.WithMachineDeploymentUpgradeReconciler() + + reconcilers, err := f.Build(ctx) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(reconcilers.MachineDeploymentUpgradeReconciler).NotTo(BeNil()) +} diff --git a/controllers/mocks/nodeupgrade_controller.go b/controllers/mocks/nodeupgrade_controller.go new file mode 100644 index 000000000000..f8d6dd178a4b --- /dev/null +++ b/controllers/mocks/nodeupgrade_controller.go @@ -0,0 +1,51 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: controllers/nodeupgrade_controller.go + +// Package mocks is a generated GoMock package. +package mocks + +import ( + context "context" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + client "sigs.k8s.io/controller-runtime/pkg/client" +) + +// MockRemoteClientRegistry is a mock of RemoteClientRegistry interface. +type MockRemoteClientRegistry struct { + ctrl *gomock.Controller + recorder *MockRemoteClientRegistryMockRecorder +} + +// MockRemoteClientRegistryMockRecorder is the mock recorder for MockRemoteClientRegistry. +type MockRemoteClientRegistryMockRecorder struct { + mock *MockRemoteClientRegistry +} + +// NewMockRemoteClientRegistry creates a new mock instance. +func NewMockRemoteClientRegistry(ctrl *gomock.Controller) *MockRemoteClientRegistry { + mock := &MockRemoteClientRegistry{ctrl: ctrl} + mock.recorder = &MockRemoteClientRegistryMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockRemoteClientRegistry) EXPECT() *MockRemoteClientRegistryMockRecorder { + return m.recorder +} + +// GetClient mocks base method. +func (m *MockRemoteClientRegistry) GetClient(ctx context.Context, cluster client.ObjectKey) (client.Client, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetClient", ctx, cluster) + ret0, _ := ret[0].(client.Client) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetClient indicates an expected call of GetClient. +func (mr *MockRemoteClientRegistryMockRecorder) GetClient(ctx, cluster interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetClient", reflect.TypeOf((*MockRemoteClientRegistry)(nil).GetClient), ctx, cluster) +} diff --git a/controllers/nodeupgrade_controller.go b/controllers/nodeupgrade_controller.go index 2ca871cc19e2..8019c504c571 100644 --- a/controllers/nodeupgrade_controller.go +++ b/controllers/nodeupgrade_controller.go @@ -2,42 +2,390 @@ package controllers import ( "context" + "fmt" + "time" + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" + "github.com/aws/eks-anywhere/pkg/constants" + upgrader "github.com/aws/eks-anywhere/pkg/nodeupgrader" ) +const ( + // TODO(in-place): Get this image from the bundle instead of using the hardcoded one. + defaultUpgraderImage = "public.ecr.aws/t0n3a9y4/aws/upgrader:v1.28.3-eks-1-28-9" + controlPlaneLabel = "node-role.kubernetes.io/control-plane" + podDNEMessage = "Upgrader pod does not exist" + + // nodeUpgradeFinalizerName is the finalizer added to NodeUpgrade objects to handle deletion. + nodeUpgradeFinalizerName = "nodeupgrades.anywhere.eks.amazonaws.com/finalizer" +) + +// RemoteClientRegistry defines methods for remote cluster controller clients. +type RemoteClientRegistry interface { + GetClient(ctx context.Context, cluster client.ObjectKey) (client.Client, error) +} + // NodeUpgradeReconciler reconciles a NodeUpgrade object. type NodeUpgradeReconciler struct { - client client.Client + client client.Client + log logr.Logger + remoteClientRegistry RemoteClientRegistry } // NewNodeUpgradeReconciler returns a new instance of NodeUpgradeReconciler. -func NewNodeUpgradeReconciler(client client.Client) *NodeUpgradeReconciler { +func NewNodeUpgradeReconciler(client client.Client, remoteClientRegistry RemoteClientRegistry) *NodeUpgradeReconciler { return &NodeUpgradeReconciler{ - client: client, + client: client, + remoteClientRegistry: remoteClientRegistry, + log: ctrl.Log.WithName("NodeUpgradeController"), } } +// SetupWithManager sets up the controller with the Manager. +func (r *NodeUpgradeReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&anywherev1.NodeUpgrade{}). + Complete(r) +} + //+kubebuilder:rbac:groups=anywhere.eks.amazonaws.com,resources=nodeupgrades,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=anywhere.eks.amazonaws.com,resources=nodeupgrades/status,verbs=get;update;patch //+kubebuilder:rbac:groups=anywhere.eks.amazonaws.com,resources=nodeupgrades/finalizers,verbs=update +//+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;delete +//+kubebuilder:rbac:groups="cluster.x-k8s.io",resources=machines,verbs=list;watch;get;patch;update // Reconcile reconciles a NodeUpgrade object. -func (r *NodeUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) +// nolint:gocyclo +func (r *NodeUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, reterr error) { + // TODO(in-place): Add validating webhook to block updating the nodeUpgrade object. + // It should be immutable. If it needs to be changed, a new spec should be applied. + + log := r.log.WithValues("NodeUpgrade", req.NamespacedName) + + log.Info("Reconciling NodeUpgrade object") + nodeUpgrade := &anywherev1.NodeUpgrade{} + if err := r.client.Get(ctx, req.NamespacedName, nodeUpgrade); err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, nil + } + return ctrl.Result{}, err + } + + machineToBeUpgraded := &clusterv1.Machine{} + if err := r.client.Get(ctx, getNamespacedNameType(nodeUpgrade.Spec.Machine.Name, nodeUpgrade.Spec.Machine.Namespace), machineToBeUpgraded); err != nil { + return ctrl.Result{}, err + } + + rClient, err := r.remoteClientRegistry.GetClient(ctx, getNamespacedNameType(machineToBeUpgraded.Spec.ClusterName, machineToBeUpgraded.Namespace)) + if err != nil { + return ctrl.Result{}, err + } + + if machineToBeUpgraded.Status.NodeRef == nil { + return ctrl.Result{}, fmt.Errorf("machine %s is missing nodeRef", machineToBeUpgraded.Name) + } + + // Initialize the patch helper + patchHelper, err := patch.NewHelper(nodeUpgrade, r.client) + if err != nil { + return ctrl.Result{}, err + } + + defer func() { + err := r.updateStatus(ctx, log, rClient, nodeUpgrade, machineToBeUpgraded.Status.NodeRef.Name) + if err != nil { + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + + // Always attempt to patch the object and status after each reconciliation. + patchOpts := []patch.Option{} + + // We want the observedGeneration to indicate, that the status shown is up-to-date given the desired spec of the same generation. + // However, if there is an error while updating the status, we may get a partial status update, In this case, + // a partially updated status is not considered up to date, so we should not update the observedGeneration + + // Patch ObservedGeneration only if the reconciliation completed without error + if reterr == nil { + patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) + } + if err := patchNodeUpgrade(ctx, patchHelper, *nodeUpgrade, patchOpts...); err != nil { + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + + // Only requeue if we are not already re-queueing and the NodeUpgrade ready condition is false. + // We do this to be able to update the status continuously until the NodeUpgrade becomes ready, + // since there might be changes in state of the world that don't trigger reconciliation requests + + if reterr == nil && !result.Requeue && result.RequeueAfter <= 0 && conditions.IsFalse(nodeUpgrade, anywherev1.ReadyCondition) { + result = ctrl.Result{RequeueAfter: 10 * time.Second} + } + }() + + // Reconcile the NodeUpgrade deletion if the DeletionTimestamp is set. + if !nodeUpgrade.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, log, nodeUpgrade, machineToBeUpgraded.Status.NodeRef.Name, rClient) + } + + controllerutil.AddFinalizer(nodeUpgrade, nodeUpgradeFinalizerName) + + return r.reconcile(ctx, log, machineToBeUpgraded, nodeUpgrade, rClient) +} + +func (r *NodeUpgradeReconciler) reconcile(ctx context.Context, log logr.Logger, machineToBeUpgraded *clusterv1.Machine, nodeUpgrade *anywherev1.NodeUpgrade, remoteClient client.Client) (ctrl.Result, error) { + node := &corev1.Node{} + if err := remoteClient.Get(ctx, types.NamespacedName{Name: machineToBeUpgraded.Status.NodeRef.Name}, node); err != nil { + return reconcile.Result{}, err + } + + // return early if node upgrade is already complete. + if nodeUpgrade.Status.Completed { + log.Info("Node is upgraded", "Node", node.Name) + return ctrl.Result{}, nil + } + + if err := namespaceOrCreate(ctx, remoteClient, log, constants.EksaSystemNamespace); err != nil { + return ctrl.Result{}, nil + } + + log.Info("Upgrading node", "Node", node.Name) + upgraderPod := &corev1.Pod{} + if conditions.IsTrue(nodeUpgrade, anywherev1.UpgraderPodCreated) || upgraderPodExists(ctx, remoteClient, node.Name) { + log.Info("Upgrader pod already exists, skipping creation of the pod", "Pod", upgraderPod.Name) + return ctrl.Result{}, nil + } + + if isControlPlane(node) { + if nodeUpgrade.Spec.FirstNodeToBeUpgraded { + upgraderPod = upgrader.UpgradeFirstControlPlanePod(node.Name, defaultUpgraderImage, nodeUpgrade.Spec.KubernetesVersion, *nodeUpgrade.Spec.EtcdVersion) + } else { + upgraderPod = upgrader.UpgradeSecondaryControlPlanePod(node.Name, defaultUpgraderImage) + } + } else { + upgraderPod = upgrader.UpgradeWorkerPod(node.Name, defaultUpgraderImage) + } - // TODO(user): your logic here + if err := remoteClient.Create(ctx, upgraderPod); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to create the upgrader pod on node %s: %v", node.Name, err) + } return ctrl.Result{}, nil } -// SetupWithManager sets up the controller with the Manager. -func (r *NodeUpgradeReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&anywherev1.NodeUpgrade{}). - Complete(r) +// namespaceOrCreate creates a namespace if it doesn't already exist. +func namespaceOrCreate(ctx context.Context, client client.Client, log logr.Logger, namespace string) error { + ns := &corev1.Namespace{} + if err := client.Get(ctx, types.NamespacedName{Name: namespace}, ns); err != nil { + if apierrors.IsNotFound(err) { + log.Info("Creating namespace on the remote cluster", "Namespace", namespace) + ns := &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{ + Name: namespace, + }, + } + if err := client.Create(ctx, ns); err != nil { + return fmt.Errorf("creating namespace %s on cluster: %v", namespace, err) + } + } else { + return fmt.Errorf("getting namespace %s on cluster: %v", namespace, err) + } + } + return nil +} + +func (r *NodeUpgradeReconciler) reconcileDelete(ctx context.Context, log logr.Logger, nodeUpgrade *anywherev1.NodeUpgrade, nodeName string, remoteClient client.Client) (ctrl.Result, error) { + log.Info("Reconcile NodeUpgrade deletion") + + pod, err := getUpgraderPod(ctx, remoteClient, nodeName) + if err != nil { + if apierrors.IsNotFound(err) { + log.Info("Upgrader pod not found, skipping pod deletion") + } else { + return ctrl.Result{}, fmt.Errorf("getting upgrader pod: %v", err) + } + } else { + // TODO(in-place): Make pod deletion logic more robust by checking if the pod is still running. + // If it is still running and not errored out, then wait before deleting the pod. + log.Info("Deleting upgrader pod", "Pod", pod.Name, "Namespace", pod.Namespace) + if err := remoteClient.Delete(ctx, pod); err != nil { + return ctrl.Result{}, fmt.Errorf("deleting upgrader pod: %v", err) + } + } + + // Remove the finalizer from NodeUpgrade object + controllerutil.RemoveFinalizer(nodeUpgrade, nodeUpgradeFinalizerName) + return ctrl.Result{}, nil +} + +func (r *NodeUpgradeReconciler) updateStatus(ctx context.Context, log logr.Logger, remoteClient client.Client, nodeUpgrade *anywherev1.NodeUpgrade, nodeName string) error { + // When NodeUpgrade is fully deleted, we do not need to update the status. Without this check + // the subsequent patch operations would fail if the status is updated after it is fully deleted. + if !nodeUpgrade.DeletionTimestamp.IsZero() && len(nodeUpgrade.GetFinalizers()) == 0 { + log.Info("NodeUpgrade is deleted, skipping status update") + return nil + } + + log.Info("Updating NodeUpgrade status") + + pod, err := getUpgraderPod(ctx, remoteClient, nodeName) + if err != nil { + if apierrors.IsNotFound(err) { + markAllConditionsFalse(nodeUpgrade, podDNEMessage, clusterv1.ConditionSeverityInfo) + } else { + markAllConditionsFalse(nodeUpgrade, err.Error(), clusterv1.ConditionSeverityError) + } + return fmt.Errorf("getting upgrader pod: %v", err) + } + + conditions.MarkTrue(nodeUpgrade, anywherev1.UpgraderPodCreated) + updateComponentsConditions(pod, nodeUpgrade) + + // Always update the readyCondition by summarizing the state of other conditions. + conditions.SetSummary(nodeUpgrade, + conditions.WithConditions( + anywherev1.UpgraderPodCreated, + anywherev1.BinariesCopied, + anywherev1.ContainerdUpgraded, + anywherev1.CNIPluginsUpgraded, + anywherev1.KubeadmUpgraded, + anywherev1.KubeletUpgraded, + anywherev1.PostUpgradeCleanupCompleted, + ), + ) + return nil +} + +func updateComponentsConditions(pod *corev1.Pod, nodeUpgrade *anywherev1.NodeUpgrade) { + containersMap := []struct { + name string + condition clusterv1.ConditionType + }{ + { + name: upgrader.CopierContainerName, + condition: anywherev1.BinariesCopied, + }, + { + name: upgrader.ContainerdUpgraderContainerName, + condition: anywherev1.ContainerdUpgraded, + }, + { + name: upgrader.CNIPluginsUpgraderContainerName, + condition: anywherev1.CNIPluginsUpgraded, + }, + { + name: upgrader.KubeadmUpgraderContainerName, + condition: anywherev1.KubeadmUpgraded, + }, + { + name: upgrader.KubeletUpgradeContainerName, + condition: anywherev1.KubeletUpgraded, + }, + { + name: upgrader.PostUpgradeContainerName, + condition: anywherev1.PostUpgradeCleanupCompleted, + }, + } + + completed := true + for _, container := range containersMap { + status, err := getInitContainerStatus(pod, container.name) + if err != nil { + conditions.MarkFalse(nodeUpgrade, container.condition, "Container status not available yet", clusterv1.ConditionSeverityWarning, "") + completed = false + } else { + if status.State.Waiting != nil { + conditions.MarkFalse(nodeUpgrade, container.condition, "Container is waiting to be initialized", clusterv1.ConditionSeverityInfo, "") + completed = false + } else if status.State.Running != nil { + conditions.MarkFalse(nodeUpgrade, container.condition, "Container is still running", clusterv1.ConditionSeverityInfo, "") + completed = false + } else if status.State.Terminated != nil { + if status.State.Terminated.ExitCode != 0 { + conditions.MarkFalse(nodeUpgrade, container.condition, fmt.Sprintf("Container exited with a non-zero exit code, reason: %s", status.State.Terminated.Reason), clusterv1.ConditionSeverityError, "") + completed = false + } else { + conditions.MarkTrue(nodeUpgrade, container.condition) + } + } else { + // this should not happen + conditions.MarkFalse(nodeUpgrade, container.condition, "Container state is unknown", clusterv1.ConditionSeverityWarning, "") + completed = false + } + } + } + nodeUpgrade.Status.Completed = completed +} + +func getInitContainerStatus(pod *corev1.Pod, containerName string) (*corev1.ContainerStatus, error) { + for _, status := range pod.Status.InitContainerStatuses { + if status.Name == containerName { + return &status, nil + } + } + return nil, fmt.Errorf("status not found for container %s in pod %s", containerName, pod.Name) +} + +func markAllConditionsFalse(nodeUpgrade *anywherev1.NodeUpgrade, message string, severity clusterv1.ConditionSeverity) { + conditions.MarkFalse(nodeUpgrade, anywherev1.UpgraderPodCreated, message, clusterv1.ConditionSeverityError, "") + conditions.MarkFalse(nodeUpgrade, anywherev1.BinariesCopied, message, clusterv1.ConditionSeverityError, "") + conditions.MarkFalse(nodeUpgrade, anywherev1.ContainerdUpgraded, message, clusterv1.ConditionSeverityError, "") + conditions.MarkFalse(nodeUpgrade, anywherev1.CNIPluginsUpgraded, message, clusterv1.ConditionSeverityError, "") + conditions.MarkFalse(nodeUpgrade, anywherev1.KubeadmUpgraded, message, clusterv1.ConditionSeverityError, "") + conditions.MarkFalse(nodeUpgrade, anywherev1.KubeletUpgraded, message, clusterv1.ConditionSeverityError, "") +} + +func isControlPlane(node *corev1.Node) bool { + _, ok := node.Labels[controlPlaneLabel] + return ok +} + +func getNamespacedNameType(name, namespace string) types.NamespacedName { + return types.NamespacedName{ + Name: name, + Namespace: namespace, + } +} + +func patchNodeUpgrade(ctx context.Context, patchHelper *patch.Helper, nodeUpgrade anywherev1.NodeUpgrade, patchOpts ...patch.Option) error { + // Patch the object, ignoring conflicts on the conditions owned by this controller. + options := append([]patch.Option{ + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + // Add each condition her that the controller should ignored conflicts for. + anywherev1.UpgraderPodCreated, + anywherev1.BinariesCopied, + anywherev1.ContainerdUpgraded, + anywherev1.CNIPluginsUpgraded, + anywherev1.KubeadmUpgraded, + anywherev1.KubeletUpgraded, + }}, + }, patchOpts...) + + // Always attempt to patch the object and status after each reconciliation. + return patchHelper.Patch(ctx, &nodeUpgrade, options...) +} + +func upgraderPodExists(ctx context.Context, remoteClient client.Client, nodeName string) bool { + _, err := getUpgraderPod(ctx, remoteClient, nodeName) + return err == nil +} + +func getUpgraderPod(ctx context.Context, remoteClient client.Client, nodeName string) (*corev1.Pod, error) { + pod := &corev1.Pod{} + if err := remoteClient.Get(ctx, getNamespacedNameType(upgrader.PodName(nodeName), constants.EksaSystemNamespace), pod); err != nil { + return nil, err + } + return pod, nil } diff --git a/controllers/nodeupgrade_controller_test.go b/controllers/nodeupgrade_controller_test.go new file mode 100644 index 000000000000..98ff008e7fac --- /dev/null +++ b/controllers/nodeupgrade_controller_test.go @@ -0,0 +1,286 @@ +package controllers_test + +import ( + "context" + "testing" + + "github.com/golang/mock/gomock" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/aws/eks-anywhere/controllers" + "github.com/aws/eks-anywhere/controllers/mocks" + anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" + upgrader "github.com/aws/eks-anywhere/pkg/nodeupgrader" + "github.com/aws/eks-anywhere/pkg/utils/ptr" +) + +func TestNodeUpgradeReconcilerReconcileFirstControlPlane(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + ctrl := gomock.NewController(t) + clientRegistry := mocks.NewMockRemoteClientRegistry(ctrl) + + cluster, machine, node, nodeUpgrade := getObjectsForNodeUpgradeTest() + nodeUpgrade.Spec.FirstNodeToBeUpgraded = true + nodeUpgrade.Spec.EtcdVersion = ptr.String("v3.5.9-eks-1-28-9") + node.Labels = map[string]string{ + "node-role.kubernetes.io/control-plane": "true", + } + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, nodeUpgrade).Build() + + clientRegistry.EXPECT().GetClient(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}).Return(client, nil) + + r := controllers.NewNodeUpgradeReconciler(client, clientRegistry) + req := nodeUpgradeRequest(nodeUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod := &corev1.Pod{} + err = client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestNodeUpgradeReconcilerReconcileNextControlPlane(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + ctrl := gomock.NewController(t) + clientRegistry := mocks.NewMockRemoteClientRegistry(ctrl) + + cluster, machine, node, nodeUpgrade := getObjectsForNodeUpgradeTest() + node.Labels = map[string]string{ + "node-role.kubernetes.io/control-plane": "true", + } + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, nodeUpgrade).Build() + + clientRegistry.EXPECT().GetClient(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}).Return(client, nil) + + r := controllers.NewNodeUpgradeReconciler(client, clientRegistry) + req := nodeUpgradeRequest(nodeUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod := &corev1.Pod{} + err = client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestNodeUpgradeReconcilerReconcileWorker(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + ctrl := gomock.NewController(t) + clientRegistry := mocks.NewMockRemoteClientRegistry(ctrl) + + cluster, machine, node, nodeUpgrade := getObjectsForNodeUpgradeTest() + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, nodeUpgrade).Build() + + clientRegistry.EXPECT().GetClient(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}).Return(client, nil) + + r := controllers.NewNodeUpgradeReconciler(client, clientRegistry) + req := nodeUpgradeRequest(nodeUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod := &corev1.Pod{} + err = client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestNodeUpgradeReconcilerReconcileCreateUpgraderPodState(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + ctrl := gomock.NewController(t) + clientRegistry := mocks.NewMockRemoteClientRegistry(ctrl) + + cluster, machine, node, nodeUpgrade := getObjectsForNodeUpgradeTest() + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, nodeUpgrade).Build() + + clientRegistry.EXPECT().GetClient(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}).Return(client, nil).Times(2) + + r := controllers.NewNodeUpgradeReconciler(client, clientRegistry) + req := nodeUpgradeRequest(nodeUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod := &corev1.Pod{} + g.Expect(client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod)).To(Succeed()) + + statuses := []corev1.ContainerStatus{ + { + Name: upgrader.CopierContainerName, + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + { + Name: upgrader.ContainerdUpgraderContainerName, + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + { + Name: upgrader.CNIPluginsUpgraderContainerName, + State: corev1.ContainerState{ + Waiting: &corev1.ContainerStateWaiting{}, + }, + }, + { + Name: upgrader.KubeadmUpgraderContainerName, + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + }, + }, + }, + { + Name: upgrader.KubeletUpgradeContainerName, + State: corev1.ContainerState{}, + }, + } + + pod.Status.InitContainerStatuses = append(pod.Status.InitContainerStatuses, statuses...) + g.Expect(client.Update(ctx, pod)).To(Succeed()) + + _, err = r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestNodeUpgradeReconcilerReconcileDelete(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + ctrl := gomock.NewController(t) + clientRegistry := mocks.NewMockRemoteClientRegistry(ctrl) + + cluster, machine, node, nodeUpgrade := getObjectsForNodeUpgradeTest() + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, nodeUpgrade).Build() + + clientRegistry.EXPECT().GetClient(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}).Return(client, nil).Times(2) + + r := controllers.NewNodeUpgradeReconciler(client, clientRegistry) + req := nodeUpgradeRequest(nodeUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod := &corev1.Pod{} + err = client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod) + g.Expect(err).ToNot(HaveOccurred()) + + err = client.Delete(ctx, nodeUpgrade) + g.Expect(err).ToNot(HaveOccurred()) + + _, err = r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod = &corev1.Pod{} + err = client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod) + g.Expect(err).To(MatchError("pods \"node01-node-upgrader\" not found")) +} + +func TestNodeUpgradeReconcilerReconcileDeleteUpgraderPodAlreadyDeleted(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + ctrl := gomock.NewController(t) + clientRegistry := mocks.NewMockRemoteClientRegistry(ctrl) + + cluster, machine, node, nodeUpgrade := getObjectsForNodeUpgradeTest() + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, nodeUpgrade).Build() + + clientRegistry.EXPECT().GetClient(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}).Return(client, nil).Times(2) + + r := controllers.NewNodeUpgradeReconciler(client, clientRegistry) + req := nodeUpgradeRequest(nodeUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod := &corev1.Pod{} + err = client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod) + g.Expect(err).ToNot(HaveOccurred()) + + err = client.Delete(ctx, nodeUpgrade) + g.Expect(err).ToNot(HaveOccurred()) + + err = client.Delete(ctx, pod) + g.Expect(err).ToNot(HaveOccurred()) + + _, err = r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + pod = &corev1.Pod{} + err = client.Get(ctx, types.NamespacedName{Name: upgrader.PodName(node.Name), Namespace: "eksa-system"}, pod) + g.Expect(err).To(MatchError("pods \"node01-node-upgrader\" not found")) +} + +func getObjectsForNodeUpgradeTest() (*clusterv1.Cluster, *clusterv1.Machine, *corev1.Node, *anywherev1.NodeUpgrade) { + cluster := generateCluster() + node := generateNode() + machine := generateMachine(cluster, node) + nodeUpgrade := generateNodeUpgrade(machine) + return cluster, machine, node, nodeUpgrade +} + +func nodeUpgradeRequest(nodeUpgrade *anywherev1.NodeUpgrade) reconcile.Request { + return reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: nodeUpgrade.Name, + Namespace: nodeUpgrade.Namespace, + }, + } +} + +func generateNodeUpgrade(machine *clusterv1.Machine) *anywherev1.NodeUpgrade { + return &anywherev1.NodeUpgrade{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-upgrade-request", + Namespace: "eksa-system", + }, + Spec: anywherev1.NodeUpgradeSpec{ + Machine: corev1.ObjectReference{ + Name: machine.Name, + Namespace: machine.Namespace, + }, + KubernetesVersion: "v1.28.1", + }, + } +} + +func generateMachine(cluster *clusterv1.Cluster, node *corev1.Node) *clusterv1.Machine { + return &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine01", + Namespace: "eksa-system", + }, + Spec: clusterv1.MachineSpec{ + Version: ptr.String("v1.28.0"), + ClusterName: cluster.Name, + }, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{ + Name: node.Name, + }, + }, + } +} + +func generateNode() *corev1.Node { + return &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node01", + }, + } +} + +func generateCluster() *clusterv1.Cluster { + return &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "eksa-system", + }, + } +} diff --git a/pkg/api/v1alpha1/nodeupgrade_types.go b/pkg/api/v1alpha1/nodeupgrade_types.go index f6730a5d306d..e9a48fb528b8 100644 --- a/pkg/api/v1alpha1/nodeupgrade_types.go +++ b/pkg/api/v1alpha1/nodeupgrade_types.go @@ -1,28 +1,62 @@ package v1alpha1 import ( + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) -// NodeUpgradeKind stores the Kind for NodeUpgrade. -const NodeUpgradeKind = "NodeUpgrade" +const ( + // NodeUpgradeKind stores the Kind for NodeUpgrade. + NodeUpgradeKind = "NodeUpgrade" + + // UpgraderPodCreated reports whether the upgrader pod has been created for the node upgrade. + UpgraderPodCreated ConditionType = "UpgraderPodCreated" + + // BinariesCopied reports whether the binaries have been copied over by the component copier container. + BinariesCopied ConditionType = "BinariesCopied" + + // ContainerdUpgraded reports whether containerd has been upgraded. + ContainerdUpgraded ConditionType = "ContainerdUpgraded" + + // CNIPluginsUpgraded reports whether the CNI plugins has been upgraded. + CNIPluginsUpgraded ConditionType = "CNIPluginsUpgraded" + + // KubeadmUpgraded reports whether Kubeadm has been upgraded. + KubeadmUpgraded ConditionType = "KubeadmUpgraded" + + // KubeletUpgraded reports whether kubelet has been upgraded. + KubeletUpgraded ConditionType = "KubeletUpgraded" + + // PostUpgradeCleanupCompleted reports whether the post upgrade operations have been completed. + PostUpgradeCleanupCompleted ConditionType = "PostUpgradeCleanupCompleted" +) // NodeUpgradeSpec defines the desired state of NodeUpgrade. type NodeUpgradeSpec struct { - Machine Ref `json:"machine"` - Node Ref `json:"node"` + // Machine is a reference to the CAPI Machine that needs to be upgraded. + Machine corev1.ObjectReference `json:"machine"` + + // TODO(in-place): Determine if there's a way to get these dynamically instead of expecting it from the CRD. KubernetesVersion string `json:"kubernetesVersion"` - KubeletVersion string `json:"kubeletVersion"` EtcdVersion *string `json:"etcdVersion,omitempty"` CoreDNSVersion *string `json:"coreDNSVersion,omitempty"` + + // FirstNodeToBeUpgraded signifies that the Node is the first node to be upgraded. + // This flag is only valid for control plane nodes and ignored for worker nodes. + // +optional + FirstNodeToBeUpgraded bool `json:"firstNodeToBeUpgraded,omitempty"` } // NodeUpgradeStatus defines the observed state of NodeUpgrade. type NodeUpgradeStatus struct { - Conditions clusterv1.Conditions `json:"conditions,omitempty"` - Phase string `json:"phase"` - Completed bool `json:"completed"` + // +optional + Conditions []Condition `json:"conditions,omitempty"` + // +optional + Completed bool `json:"completed,omitempty"` + + // ObservedGeneration is the latest generation observed by the controller. + ObservedGeneration int64 `json:"observedGeneration,omitempty"` } //+kubebuilder:object:root=true @@ -49,3 +83,13 @@ type NodeUpgradeList struct { func init() { SchemeBuilder.Register(&NodeUpgrade{}, &NodeUpgradeList{}) } + +// GetConditions returns all the Conditions for the NodeUpgrade object. +func (n *NodeUpgrade) GetConditions() clusterv1.Conditions { + return n.Status.Conditions +} + +// SetConditions sets the Conditons on the NodeUpgrade object. +func (n *NodeUpgrade) SetConditions(conditions clusterv1.Conditions) { + n.Status.Conditions = conditions +} diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index fb960cf27e64..fc30d2f05b93 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -1857,7 +1857,6 @@ func (in *NodeUpgradeList) DeepCopyObject() runtime.Object { func (in *NodeUpgradeSpec) DeepCopyInto(out *NodeUpgradeSpec) { *out = *in out.Machine = in.Machine - out.Node = in.Node if in.EtcdVersion != nil { in, out := &in.EtcdVersion, &out.EtcdVersion *out = new(string) @@ -1885,7 +1884,7 @@ func (in *NodeUpgradeStatus) DeepCopyInto(out *NodeUpgradeStatus) { *out = *in if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions - *out = make(v1beta1.Conditions, len(*in)) + *out = make([]v1beta1.Condition, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } diff --git a/pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml b/pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml new file mode 100644 index 000000000000..67b631d0a2c3 --- /dev/null +++ b/pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml @@ -0,0 +1,114 @@ +metadata: + creationTimestamp: null + labels: + ekd-d-upgrader: "true" + name: my-node-node-upgrader + namespace: eksa-system +spec: + containers: + - image: nginx + name: done + resources: {} + hostPID: true + initContainers: + - args: + - -r + - /eksa-upgrades + - /usr/host + command: + - cp + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: components-copier + resources: {} + volumeMounts: + - mountPath: /usr/host + name: host-components + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - upgrade_containerd + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: containerd-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - cni_plugins + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: cni-plugins-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - kubeadm_in_first_cp + - v1.28.3-eks-1-28-9 + - v3.5.9-eks-1-28-9 + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: kubeadm-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - kubelet_and_kubectl + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: kubelet-kubectl-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - print_status_and_cleanup + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: post-upgrade-status + resources: {} + securityContext: + privileged: true + nodeName: my-node + volumes: + - hostPath: + path: /foo + type: DirectoryOrCreate + name: host-components +status: {} diff --git a/pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml b/pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml new file mode 100755 index 000000000000..b0df3a8cbfd5 --- /dev/null +++ b/pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml @@ -0,0 +1,112 @@ +metadata: + creationTimestamp: null + labels: + ekd-d-upgrader: "true" + name: my-node-node-upgrader + namespace: eksa-system +spec: + containers: + - image: nginx + name: done + resources: {} + hostPID: true + initContainers: + - args: + - -r + - /eksa-upgrades + - /usr/host + command: + - cp + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: components-copier + resources: {} + volumeMounts: + - mountPath: /usr/host + name: host-components + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - upgrade_containerd + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: containerd-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - cni_plugins + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: cni-plugins-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - kubeadm_in_rest_cp + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: kubeadm-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - kubelet_and_kubectl + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: kubelet-kubectl-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - print_status_and_cleanup + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: post-upgrade-status + resources: {} + securityContext: + privileged: true + nodeName: my-node + volumes: + - hostPath: + path: /foo + type: DirectoryOrCreate + name: host-components +status: {} diff --git a/pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml b/pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml new file mode 100755 index 000000000000..f5f56086f5b5 --- /dev/null +++ b/pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml @@ -0,0 +1,112 @@ +metadata: + creationTimestamp: null + labels: + ekd-d-upgrader: "true" + name: my-node-node-upgrader + namespace: eksa-system +spec: + containers: + - image: nginx + name: done + resources: {} + hostPID: true + initContainers: + - args: + - -r + - /eksa-upgrades + - /usr/host + command: + - cp + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: components-copier + resources: {} + volumeMounts: + - mountPath: /usr/host + name: host-components + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - upgrade_containerd + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: containerd-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - cni_plugins + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: cni-plugins-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - kubeadm_in_worker + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: kubeadm-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - kubelet_and_kubectl + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: kubelet-kubectl-upgrader + resources: {} + securityContext: + privileged: true + - args: + - --target + - "1" + - --mount + - --uts + - --ipc + - --net + - /foo/eksa-upgrades/scripts/upgrade.sh + - print_status_and_cleanup + command: + - nsenter + image: public.ecr.aws/eks-anywhere/node-upgrader:latest + name: post-upgrade-status + resources: {} + securityContext: + privileged: true + nodeName: my-node + volumes: + - hostPath: + path: /foo + type: DirectoryOrCreate + name: host-components +status: {} diff --git a/pkg/nodeupgrader/upgrader.go b/pkg/nodeupgrader/upgrader.go new file mode 100644 index 000000000000..f5507fbfd66f --- /dev/null +++ b/pkg/nodeupgrader/upgrader.go @@ -0,0 +1,144 @@ +package nodeupgrader + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/aws/eks-anywhere/pkg/constants" + "github.com/aws/eks-anywhere/pkg/utils/ptr" +) + +const ( + upgradeScript = "/foo/eksa-upgrades/scripts/upgrade.sh" + + // CopierContainerName holds the name of the components copier container. + CopierContainerName = "components-copier" + + // ContainerdUpgraderContainerName holds the name of the containerd upgrader container. + ContainerdUpgraderContainerName = "containerd-upgrader" + + // CNIPluginsUpgraderContainerName holds the name of the CNI plugins upgrader container. + CNIPluginsUpgraderContainerName = "cni-plugins-upgrader" + + // KubeadmUpgraderContainerName holds the name of the kubeadm upgrader container. + KubeadmUpgraderContainerName = "kubeadm-upgrader" + + // KubeletUpgradeContainerName holds the name of the kubelet/kubectl upgrader container. + KubeletUpgradeContainerName = "kubelet-kubectl-upgrader" + + // PostUpgradeContainerName holds the name of the post upgrade cleanup/status report container. + PostUpgradeContainerName = "post-upgrade-status" +) + +// PodName returns the name of the upgrader pod based on the nodeName. +func PodName(nodeName string) string { + return fmt.Sprintf("%s-node-upgrader", nodeName) +} + +// UpgradeFirstControlPlanePod returns an upgrader pod that should be deployed on the first control plane node. +func UpgradeFirstControlPlanePod(nodeName, image, kubernetesVersion, etcdVersion string) *corev1.Pod { + p := upgraderPod(nodeName, image) + p.Spec.InitContainers = containersForUpgrade(image, nodeName, "kubeadm_in_first_cp", kubernetesVersion, etcdVersion) + return p +} + +// UpgradeSecondaryControlPlanePod returns an upgrader pod that can be deployed on the remaining control plane nodes. +func UpgradeSecondaryControlPlanePod(nodeName, image string) *corev1.Pod { + p := upgraderPod(nodeName, image) + p.Spec.InitContainers = containersForUpgrade(image, nodeName, "kubeadm_in_rest_cp") + return p +} + +// UpgradeWorkerPod returns an upgrader pod that can be deployed on worker nodes. +func UpgradeWorkerPod(nodeName, image string) *corev1.Pod { + p := upgraderPod(nodeName, image) + p.Spec.InitContainers = containersForUpgrade(image, nodeName, "kubeadm_in_worker") + return p +} + +func upgraderPod(nodeName, image string) *corev1.Pod { + dirOrCreate := corev1.HostPathDirectoryOrCreate + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: PodName(nodeName), + Namespace: constants.EksaSystemNamespace, + Labels: map[string]string{ + "ekd-d-upgrader": "true", + }, + }, + Spec: corev1.PodSpec{ + NodeName: nodeName, + HostPID: true, + Volumes: []corev1.Volume{ + { + Name: "host-components", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/foo", + Type: &dirOrCreate, + }, + }, + }, + }, + // TODO(in-place): currently, the pod requires atleast one container. + // For the time being, I have added an nginx container but + // this should be replaced with something that makes more + // sense in in-place context. + Containers: []corev1.Container{ + { + Name: "done", + Image: "nginx", + }, + }, + }, + } +} + +func containersForUpgrade(image, nodeName string, kubeadmUpgradeCommand ...string) []corev1.Container { + return []corev1.Container{ + copierContainer(image), + nsenterContainer(image, ContainerdUpgraderContainerName, upgradeScript, "upgrade_containerd"), + nsenterContainer(image, CNIPluginsUpgraderContainerName, upgradeScript, "cni_plugins"), + nsenterContainer(image, KubeadmUpgraderContainerName, append([]string{upgradeScript}, kubeadmUpgradeCommand...)...), + nsenterContainer(image, KubeletUpgradeContainerName, upgradeScript, "kubelet_and_kubectl"), + nsenterContainer(image, PostUpgradeContainerName, upgradeScript, "print_status_and_cleanup"), + } +} + +func copierContainer(image string) corev1.Container { + return corev1.Container{ + Name: CopierContainerName, + Image: image, + Command: []string{"cp"}, + Args: []string{"-r", "/eksa-upgrades", "/usr/host"}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "host-components", + MountPath: "/usr/host", + }, + }, + } +} + +func nsenterContainer(image, name string, extraArgs ...string) corev1.Container { + args := []string{ + "--target", + "1", + "--mount", + "--uts", + "--ipc", + "--net", + } + + return corev1.Container{ + Name: name, + Image: image, + Command: []string{"nsenter"}, + Args: append(args, extraArgs...), + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.Bool(true), + }, + } +} diff --git a/pkg/nodeupgrader/upgrader_test.go b/pkg/nodeupgrader/upgrader_test.go new file mode 100644 index 000000000000..e35834d3d363 --- /dev/null +++ b/pkg/nodeupgrader/upgrader_test.go @@ -0,0 +1,48 @@ +package nodeupgrader_test + +import ( + "testing" + + . "github.com/onsi/gomega" + "sigs.k8s.io/yaml" + + "github.com/aws/eks-anywhere/internal/test" + "github.com/aws/eks-anywhere/pkg/nodeupgrader" +) + +const ( + nodeName = "my-node" + upgraderImage = "public.ecr.aws/eks-anywhere/node-upgrader:latest" + kubernetesVersion = "v1.28.3-eks-1-28-9" + etcdVersion = "v3.5.9-eks-1-28-9" +) + +func TestUpgradeFirstControlPlanePod(t *testing.T) { + g := NewWithT(t) + pod := nodeupgrader.UpgradeFirstControlPlanePod(nodeName, upgraderImage, kubernetesVersion, etcdVersion) + g.Expect(pod).ToNot(BeNil()) + + data, err := yaml.Marshal(pod) + g.Expect(err).ToNot(HaveOccurred()) + test.AssertContentToFile(t, string(data), "testdata/expected_first_control_plane_upgrader_pod.yaml") +} + +func TestUpgradeSecondaryControlPlanePod(t *testing.T) { + g := NewWithT(t) + pod := nodeupgrader.UpgradeSecondaryControlPlanePod(nodeName, upgraderImage) + g.Expect(pod).ToNot(BeNil()) + + data, err := yaml.Marshal(pod) + g.Expect(err).ToNot(HaveOccurred()) + test.AssertContentToFile(t, string(data), "testdata/expected_rest_control_plane_upgrader_pod.yaml") +} + +func TestUpgradeWorkerPod(t *testing.T) { + g := NewWithT(t) + pod := nodeupgrader.UpgradeWorkerPod(nodeName, upgraderImage) + g.Expect(pod).ToNot(BeNil()) + + data, err := yaml.Marshal(pod) + g.Expect(err).ToNot(HaveOccurred()) + test.AssertContentToFile(t, string(data), "testdata/expected_worker_upgrader_pod.yaml") +}