From 92f5295e3a8ebd6a1d510bd14e0885c5c86c96db Mon Sep 17 00:00:00 2001 From: Rahul Ganesh <31204974+rahulbabu95@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:27:03 -0800 Subject: [PATCH] Add initial e2e framework validations for InPlace Upgrades (#7515) Signed-off-by: Rahul Ganesh Co-authored-by: Rahul Ganesh --- internal/pkg/api/cluster.go | 15 ++ test/e2e/TINKERBELL_HARDWARE_COUNT.yaml | 5 +- test/e2e/tinkerbell_test.go | 63 ++++++++ test/e2e/upgrade.go | 10 ++ test/framework/cluster/validations/cluster.go | 134 +++++++++++++++++- 5 files changed, 225 insertions(+), 2 deletions(-) diff --git a/internal/pkg/api/cluster.go b/internal/pkg/api/cluster.go index 835d8b4cfb66..8bae35e43d72 100644 --- a/internal/pkg/api/cluster.go +++ b/internal/pkg/api/cluster.go @@ -348,3 +348,18 @@ func WithEtcdEncryptionFiller(kms *anywherev1.KMS, resources []string) ClusterFi } } } + +// WithInPlaceUpgradeStrategy configures the UpgradeStrategy on Control-plane and Worker node groups to InPlace. +func WithInPlaceUpgradeStrategy() ClusterFiller { + return func(c *anywherev1.Cluster) { + c.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy = &anywherev1.ControlPlaneUpgradeRolloutStrategy{ + Type: anywherev1.InPlaceStrategyType, + } + for idx, wng := range c.Spec.WorkerNodeGroupConfigurations { + wng.UpgradeRolloutStrategy = &anywherev1.WorkerNodesUpgradeRolloutStrategy{ + Type: anywherev1.InPlaceStrategyType, + } + c.Spec.WorkerNodeGroupConfigurations[idx] = wng + } + } +} diff --git a/test/e2e/TINKERBELL_HARDWARE_COUNT.yaml b/test/e2e/TINKERBELL_HARDWARE_COUNT.yaml index 93e536069dbe..4a9d3f27a916 100644 --- a/test/e2e/TINKERBELL_HARDWARE_COUNT.yaml +++ b/test/e2e/TINKERBELL_HARDWARE_COUNT.yaml @@ -112,4 +112,7 @@ TestTinkerbellKubernetes128UbuntuOOB: 2 TestTinkerbellK8sUpgrade127to128WithUbuntuOOB: 4 TestTinkerbellKubernetes127UbuntuTo128UpgradeCPOnly: 3 TestTinkerbellKubernetes127UbuntuTo128UpgradeWorkerOnly: 3 -TestTinkerbellSingleNode127To128UbuntuManagementCPUpgradeAPI: 4 \ No newline at end of file +TestTinkerbellSingleNode127To128UbuntuManagementCPUpgradeAPI: 4 +TestTinkerbellKubernetes125UbuntuTo126InPlaceUpgrade_1CP_1Worker: 2 +TestTinkerbellKubernetes126UbuntuTo127InPlaceUpgrade_1CP_2Worker: 3 +TestTinkerbellKubernetes127UbuntuTo128InPlaceUpgrade_3CP_1Worker: 4 \ No newline at end of file diff --git a/test/e2e/tinkerbell_test.go b/test/e2e/tinkerbell_test.go index 77d6643c95ad..4488c55e3a89 100644 --- a/test/e2e/tinkerbell_test.go +++ b/test/e2e/tinkerbell_test.go @@ -321,6 +321,69 @@ func TestTinkerbellKubernetes125UbuntuAddWorkerNodeGroupWithAPI(t *testing.T) { ) } +func TestTinkerbellKubernetes125UbuntuTo126InPlaceUpgrade_1CP_1Worker(t *testing.T) { + provider := framework.NewTinkerbell(t) + test := framework.NewClusterE2ETest( + t, + provider, + framework.WithClusterFiller(api.WithKubernetesVersion(v1alpha1.Kube125)), + framework.WithClusterFiller(api.WithControlPlaneCount(1)), + framework.WithClusterFiller(api.WithWorkerNodeCount(1)), + framework.WithClusterFiller(api.WithInPlaceUpgradeStrategy()), + framework.WithControlPlaneHardware(1), + framework.WithWorkerHardware(1), + ).WithClusterConfig( + provider.WithKubeVersionAndOS(v1alpha1.Kube125, framework.Ubuntu2004, nil), + ) + runInPlaceUpgradeFlowForBareMetal( + test, + framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube126), api.WithInPlaceUpgradeStrategy()), + provider.WithProviderUpgrade(framework.Ubuntu126Image()), + ) +} + +func TestTinkerbellKubernetes126UbuntuTo127InPlaceUpgrade_1CP_2Worker(t *testing.T) { + provider := framework.NewTinkerbell(t) + test := framework.NewClusterE2ETest( + t, + provider, + framework.WithClusterFiller(api.WithKubernetesVersion(v1alpha1.Kube126)), + framework.WithClusterFiller(api.WithControlPlaneCount(1)), + framework.WithClusterFiller(api.WithWorkerNodeCount(2)), + framework.WithClusterFiller(api.WithInPlaceUpgradeStrategy()), + framework.WithControlPlaneHardware(1), + framework.WithWorkerHardware(2), + ).WithClusterConfig( + provider.WithKubeVersionAndOS(v1alpha1.Kube126, framework.Ubuntu2004, nil), + ) + runInPlaceUpgradeFlowForBareMetal( + test, + framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube127), api.WithInPlaceUpgradeStrategy()), + provider.WithProviderUpgrade(framework.Ubuntu127Image()), + ) +} + +func TestTinkerbellKubernetes127UbuntuTo128InPlaceUpgrade_3CP_1Worker(t *testing.T) { + provider := framework.NewTinkerbell(t) + test := framework.NewClusterE2ETest( + t, + provider, + framework.WithClusterFiller(api.WithKubernetesVersion(v1alpha1.Kube127)), + framework.WithClusterFiller(api.WithControlPlaneCount(3)), + framework.WithClusterFiller(api.WithWorkerNodeCount(1)), + framework.WithClusterFiller(api.WithInPlaceUpgradeStrategy()), + framework.WithControlPlaneHardware(3), + framework.WithWorkerHardware(1), + ).WithClusterConfig( + provider.WithKubeVersionAndOS(v1alpha1.Kube127, framework.Ubuntu2004, nil), + ) + runInPlaceUpgradeFlowForBareMetal( + test, + framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube128), api.WithInPlaceUpgradeStrategy()), + provider.WithProviderUpgrade(framework.Ubuntu128Image()), + ) +} + // Curated packages func TestTinkerbellKubernetes127UbuntuSingleNodeCuratedPackagesFlow(t *testing.T) { test := framework.NewClusterE2ETest(t, diff --git a/test/e2e/upgrade.go b/test/e2e/upgrade.go index 46a7bf1d2ba3..d4cac4e6063b 100644 --- a/test/e2e/upgrade.go +++ b/test/e2e/upgrade.go @@ -60,6 +60,16 @@ func runSimpleUpgradeFlowWorkerNodeVersionForBareMetal(test *framework.ClusterE2 test.ValidateHardwareDecommissioned() } +func runInPlaceUpgradeFlowForBareMetal(test *framework.ClusterE2ETest, clusterOpts ...framework.ClusterE2ETestOpt) { + test.GenerateHardwareConfig() + test.CreateCluster(framework.WithControlPlaneWaitTimeout("20m")) + test.UpgradeClusterWithNewConfig(clusterOpts) + test.ValidateClusterState() + test.StopIfFailed() + test.DeleteCluster() + test.ValidateHardwareDecommissioned() +} + // runSimpleUpgradeFlowForBaremetalWithoutClusterConfigGeneration runs the Create, Upgrade and Delete cluster flows // for Baremetal that use the cluster config generated by the WithClusterConfig method when the test object is created, // and avoids regenerating a cluster config with defaults. diff --git a/test/framework/cluster/validations/cluster.go b/test/framework/cluster/validations/cluster.go index ade844aab07a..3b7e4cdf83e1 100644 --- a/test/framework/cluster/validations/cluster.go +++ b/test/framework/cluster/validations/cluster.go @@ -7,6 +7,7 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + apierrors1 "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" apierrors "k8s.io/apimachinery/pkg/util/errors" @@ -22,7 +23,7 @@ import ( clusterf "github.com/aws/eks-anywhere/test/framework/cluster" ) -// ValidateClusterReady gets the CAPICluster from the client then validates that it is in a ready state. +// ValidateClusterReady gets the CAPICluster from the client then validates that it is in a ready state. Also check if CAPI objects are in expected state for InPlace Upgrades. func ValidateClusterReady(ctx context.Context, vc clusterf.StateValidationConfig) error { clus := vc.ClusterSpec.Cluster mgmtClusterClient := vc.ManagementClusterClient @@ -36,6 +37,9 @@ func ValidateClusterReady(ctx context.Context, vc clusterf.StateValidationConfig if conditions.IsFalse(capiCluster, v1beta1.ReadyCondition) { return fmt.Errorf("CAPI cluster %s not ready yet. %s", capiCluster.GetName(), conditions.GetReason(capiCluster, v1beta1.ReadyCondition)) } + if clus.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil && clus.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.Type == v1alpha1.InPlaceStrategyType { + return validateCAPIobjectsForInPlace(ctx, vc) + } return nil } @@ -60,6 +64,134 @@ func ValidateEKSAObjects(ctx context.Context, vc clusterf.StateValidationConfig) return nil } +func validateCAPIobjectsForInPlace(ctx context.Context, vc clusterf.StateValidationConfig) error { + if err := validateKCP(ctx, vc); err != nil { + return fmt.Errorf("failed to validate KubeadmControlPlane: %v", err) + } + if err := validateMDs(ctx, vc); err != nil { + return fmt.Errorf("failed to validate MachineDeployment: %v", err) + } + if err := validateInPlaceCRsDoesNotExist(ctx, vc); err != nil { + return fmt.Errorf("failed to validate InPlace CRDs: %v", err) + } + return nil +} + +func validateKCP(ctx context.Context, vc clusterf.StateValidationConfig) error { + kcp, err := controller.GetKubeadmControlPlane(ctx, vc.ManagementClusterClient, vc.ClusterSpec.Cluster) + if err != nil { + return fmt.Errorf("failed to retrieve kcp: %s", err) + } + if kcp == nil { + return errors.New("KubeadmControlPlane object not found") + } + if conditions.IsFalse(kcp, v1beta1.ReadyCondition) { + return errors.New("kcp ready condition is not true") + } else if kcp.Status.UpdatedReplicas != kcp.Status.ReadyReplicas || kcp.Spec.Replicas != &kcp.Status.UpdatedReplicas { + return fmt.Errorf("kcp replicas count %d, updated replicas count %d and ready replicas count %d are not in sync", *kcp.Spec.Replicas, kcp.Status.UpdatedReplicas, kcp.Status.ReadyReplicas) + } + return nil +} + +func validateMDs(ctx context.Context, vc clusterf.StateValidationConfig) error { + mds, err := controller.GetMachineDeployments(ctx, vc.ManagementClusterClient, vc.ClusterSpec.Cluster) + if err != nil { + return fmt.Errorf("failed to retrieve machinedeployments: %s", err) + } + if len(mds) == 0 { + return errors.New("machinedeployment object not found") + } + for _, md := range mds { + if conditions.IsFalse(&md, v1beta1.ReadyCondition) { + return fmt.Errorf("md ready condition is not true for md %s", md.Name) + } else if md.Status.UpdatedReplicas != md.Status.ReadyReplicas || md.Spec.Replicas != &md.Status.UpdatedReplicas { + return fmt.Errorf("md replicas count %d, updated replicas count %d and ready replicas count %d for md %s are not in sync", *md.Spec.Replicas, md.Status.UpdatedReplicas, md.Status.ReadyReplicas, md.Name) + } + } + return nil +} + +func validateInPlaceCRsDoesNotExist(ctx context.Context, vc clusterf.StateValidationConfig) error { + if err := validateCPUDeleted(ctx, vc); err != nil { + return err + } + if err := validateMDUsDeleted(ctx, vc); err != nil { + return err + } + if err := validateNUsAndPodsDeleted(ctx, vc); err != nil { + return err + } + return nil +} + +func validateCPUDeleted(ctx context.Context, vc clusterf.StateValidationConfig) error { + clusterName := vc.ClusterSpec.Cluster.Name + client := vc.ManagementClusterClient + cpu := &v1alpha1.ControlPlaneUpgrade{} + key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: clusterName + "-cp-upgrade"} + if err := client.Get(ctx, key, cpu); err != nil { + if !apierrors1.IsNotFound(err) { + return fmt.Errorf("failed to get ControlPlaneUpgrade: %s", err) + } + } + if cpu.Name != "" { + return errors.New("CPUpgrade object not expected but still exists on the cluster") + } + return nil +} + +func validateMDUsDeleted(ctx context.Context, vc clusterf.StateValidationConfig) error { + mds, err := controller.GetMachineDeployments(ctx, vc.ManagementClusterClient, vc.ClusterSpec.Cluster) + if err != nil { + return fmt.Errorf("failed to retrieve machinedeployments: %s", err) + } + client := vc.ManagementClusterClient + for _, md := range mds { + mdu := &v1alpha1.MachineDeploymentUpgrade{} + key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: md.Name + "-md-upgrade"} + if err := client.Get(ctx, key, mdu); err != nil { + if !apierrors1.IsNotFound(err) { + return fmt.Errorf("failed to get MachineDeploymentUpgrade: %s", err) + } + } + if mdu.Name != "" { + return errors.New("MDUpgrade object not expected but still exists on the cluster") + } + } + return nil +} + +func validateNUsAndPodsDeleted(ctx context.Context, vc clusterf.StateValidationConfig) error { + machines := &v1beta1.MachineList{} + if err := vc.ClusterClient.List(ctx, machines); err != nil { + return fmt.Errorf("failed to list machines: %s", err) + } + client := vc.ManagementClusterClient + clusterClient := vc.ClusterClient + for _, machine := range machines.Items { + nu := &v1alpha1.NodeUpgrade{} + po := &corev1.Pod{} + key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: machine.Name + "-node-upgrader"} + if err := client.Get(ctx, key, nu); err != nil { + if !apierrors1.IsNotFound(err) { + return fmt.Errorf("failed to get NodeUpgrade: %s", err) + } + } + if nu.Name != "" { + return errors.New("NodeUpgrade object not expected, but still exists on the cluster") + } + if err := clusterClient.Get(ctx, key, po); err != nil { + if !apierrors1.IsNotFound(err) { + return fmt.Errorf("failed to get Upgrader Pod: %s", err) + } + } + if po.Name != "" { + return errors.New("Upgrader pod object not expected, but still exists on the cluster") + } + } + return nil +} + // ValidateControlPlaneNodes retrieves the control plane nodes from the cluster and checks them against the cluster.Spec. func ValidateControlPlaneNodes(ctx context.Context, vc clusterf.StateValidationConfig) error { clus := vc.ClusterSpec.Cluster