Skip to content

Commit

Permalink
Add initial e2e framework validations for InPlace Upgrades (#7515)
Browse files Browse the repository at this point in the history
Signed-off-by: Rahul Ganesh <[email protected]>
Co-authored-by: Rahul Ganesh <[email protected]>
  • Loading branch information
rahulbabu95 and Rahul Ganesh authored Feb 8, 2024
1 parent 11de1b8 commit 92f5295
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 2 deletions.
15 changes: 15 additions & 0 deletions internal/pkg/api/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,18 @@ func WithEtcdEncryptionFiller(kms *anywherev1.KMS, resources []string) ClusterFi
}
}
}

// WithInPlaceUpgradeStrategy configures the UpgradeStrategy on Control-plane and Worker node groups to InPlace.
func WithInPlaceUpgradeStrategy() ClusterFiller {
return func(c *anywherev1.Cluster) {
c.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy = &anywherev1.ControlPlaneUpgradeRolloutStrategy{
Type: anywherev1.InPlaceStrategyType,
}
for idx, wng := range c.Spec.WorkerNodeGroupConfigurations {
wng.UpgradeRolloutStrategy = &anywherev1.WorkerNodesUpgradeRolloutStrategy{
Type: anywherev1.InPlaceStrategyType,
}
c.Spec.WorkerNodeGroupConfigurations[idx] = wng
}
}
}
5 changes: 4 additions & 1 deletion test/e2e/TINKERBELL_HARDWARE_COUNT.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,7 @@ TestTinkerbellKubernetes128UbuntuOOB: 2
TestTinkerbellK8sUpgrade127to128WithUbuntuOOB: 4
TestTinkerbellKubernetes127UbuntuTo128UpgradeCPOnly: 3
TestTinkerbellKubernetes127UbuntuTo128UpgradeWorkerOnly: 3
TestTinkerbellSingleNode127To128UbuntuManagementCPUpgradeAPI: 4
TestTinkerbellSingleNode127To128UbuntuManagementCPUpgradeAPI: 4
TestTinkerbellKubernetes125UbuntuTo126InPlaceUpgrade_1CP_1Worker: 2
TestTinkerbellKubernetes126UbuntuTo127InPlaceUpgrade_1CP_2Worker: 3
TestTinkerbellKubernetes127UbuntuTo128InPlaceUpgrade_3CP_1Worker: 4
63 changes: 63 additions & 0 deletions test/e2e/tinkerbell_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,69 @@ func TestTinkerbellKubernetes125UbuntuAddWorkerNodeGroupWithAPI(t *testing.T) {
)
}

func TestTinkerbellKubernetes125UbuntuTo126InPlaceUpgrade_1CP_1Worker(t *testing.T) {
provider := framework.NewTinkerbell(t)
test := framework.NewClusterE2ETest(
t,
provider,
framework.WithClusterFiller(api.WithKubernetesVersion(v1alpha1.Kube125)),
framework.WithClusterFiller(api.WithControlPlaneCount(1)),
framework.WithClusterFiller(api.WithWorkerNodeCount(1)),
framework.WithClusterFiller(api.WithInPlaceUpgradeStrategy()),
framework.WithControlPlaneHardware(1),
framework.WithWorkerHardware(1),
).WithClusterConfig(
provider.WithKubeVersionAndOS(v1alpha1.Kube125, framework.Ubuntu2004, nil),
)
runInPlaceUpgradeFlowForBareMetal(
test,
framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube126), api.WithInPlaceUpgradeStrategy()),
provider.WithProviderUpgrade(framework.Ubuntu126Image()),
)
}

func TestTinkerbellKubernetes126UbuntuTo127InPlaceUpgrade_1CP_2Worker(t *testing.T) {
provider := framework.NewTinkerbell(t)
test := framework.NewClusterE2ETest(
t,
provider,
framework.WithClusterFiller(api.WithKubernetesVersion(v1alpha1.Kube126)),
framework.WithClusterFiller(api.WithControlPlaneCount(1)),
framework.WithClusterFiller(api.WithWorkerNodeCount(2)),
framework.WithClusterFiller(api.WithInPlaceUpgradeStrategy()),
framework.WithControlPlaneHardware(1),
framework.WithWorkerHardware(2),
).WithClusterConfig(
provider.WithKubeVersionAndOS(v1alpha1.Kube126, framework.Ubuntu2004, nil),
)
runInPlaceUpgradeFlowForBareMetal(
test,
framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube127), api.WithInPlaceUpgradeStrategy()),
provider.WithProviderUpgrade(framework.Ubuntu127Image()),
)
}

func TestTinkerbellKubernetes127UbuntuTo128InPlaceUpgrade_3CP_1Worker(t *testing.T) {
provider := framework.NewTinkerbell(t)
test := framework.NewClusterE2ETest(
t,
provider,
framework.WithClusterFiller(api.WithKubernetesVersion(v1alpha1.Kube127)),
framework.WithClusterFiller(api.WithControlPlaneCount(3)),
framework.WithClusterFiller(api.WithWorkerNodeCount(1)),
framework.WithClusterFiller(api.WithInPlaceUpgradeStrategy()),
framework.WithControlPlaneHardware(3),
framework.WithWorkerHardware(1),
).WithClusterConfig(
provider.WithKubeVersionAndOS(v1alpha1.Kube127, framework.Ubuntu2004, nil),
)
runInPlaceUpgradeFlowForBareMetal(
test,
framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube128), api.WithInPlaceUpgradeStrategy()),
provider.WithProviderUpgrade(framework.Ubuntu128Image()),
)
}

// Curated packages
func TestTinkerbellKubernetes127UbuntuSingleNodeCuratedPackagesFlow(t *testing.T) {
test := framework.NewClusterE2ETest(t,
Expand Down
10 changes: 10 additions & 0 deletions test/e2e/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ func runSimpleUpgradeFlowWorkerNodeVersionForBareMetal(test *framework.ClusterE2
test.ValidateHardwareDecommissioned()
}

func runInPlaceUpgradeFlowForBareMetal(test *framework.ClusterE2ETest, clusterOpts ...framework.ClusterE2ETestOpt) {
test.GenerateHardwareConfig()
test.CreateCluster(framework.WithControlPlaneWaitTimeout("20m"))
test.UpgradeClusterWithNewConfig(clusterOpts)
test.ValidateClusterState()
test.StopIfFailed()
test.DeleteCluster()
test.ValidateHardwareDecommissioned()
}

// runSimpleUpgradeFlowForBaremetalWithoutClusterConfigGeneration runs the Create, Upgrade and Delete cluster flows
// for Baremetal that use the cluster config generated by the WithClusterConfig method when the test object is created,
// and avoids regenerating a cluster config with defaults.
Expand Down
134 changes: 133 additions & 1 deletion test/framework/cluster/validations/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors1 "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
apierrors "k8s.io/apimachinery/pkg/util/errors"
Expand All @@ -22,7 +23,7 @@ import (
clusterf "github.com/aws/eks-anywhere/test/framework/cluster"
)

// ValidateClusterReady gets the CAPICluster from the client then validates that it is in a ready state.
// ValidateClusterReady gets the CAPICluster from the client then validates that it is in a ready state. Also check if CAPI objects are in expected state for InPlace Upgrades.
func ValidateClusterReady(ctx context.Context, vc clusterf.StateValidationConfig) error {
clus := vc.ClusterSpec.Cluster
mgmtClusterClient := vc.ManagementClusterClient
Expand All @@ -36,6 +37,9 @@ func ValidateClusterReady(ctx context.Context, vc clusterf.StateValidationConfig
if conditions.IsFalse(capiCluster, v1beta1.ReadyCondition) {
return fmt.Errorf("CAPI cluster %s not ready yet. %s", capiCluster.GetName(), conditions.GetReason(capiCluster, v1beta1.ReadyCondition))
}
if clus.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil && clus.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.Type == v1alpha1.InPlaceStrategyType {
return validateCAPIobjectsForInPlace(ctx, vc)
}
return nil
}

Expand All @@ -60,6 +64,134 @@ func ValidateEKSAObjects(ctx context.Context, vc clusterf.StateValidationConfig)
return nil
}

func validateCAPIobjectsForInPlace(ctx context.Context, vc clusterf.StateValidationConfig) error {
if err := validateKCP(ctx, vc); err != nil {
return fmt.Errorf("failed to validate KubeadmControlPlane: %v", err)
}
if err := validateMDs(ctx, vc); err != nil {
return fmt.Errorf("failed to validate MachineDeployment: %v", err)
}
if err := validateInPlaceCRsDoesNotExist(ctx, vc); err != nil {
return fmt.Errorf("failed to validate InPlace CRDs: %v", err)
}
return nil
}

func validateKCP(ctx context.Context, vc clusterf.StateValidationConfig) error {
kcp, err := controller.GetKubeadmControlPlane(ctx, vc.ManagementClusterClient, vc.ClusterSpec.Cluster)
if err != nil {
return fmt.Errorf("failed to retrieve kcp: %s", err)
}
if kcp == nil {
return errors.New("KubeadmControlPlane object not found")
}
if conditions.IsFalse(kcp, v1beta1.ReadyCondition) {
return errors.New("kcp ready condition is not true")
} else if kcp.Status.UpdatedReplicas != kcp.Status.ReadyReplicas || kcp.Spec.Replicas != &kcp.Status.UpdatedReplicas {
return fmt.Errorf("kcp replicas count %d, updated replicas count %d and ready replicas count %d are not in sync", *kcp.Spec.Replicas, kcp.Status.UpdatedReplicas, kcp.Status.ReadyReplicas)
}
return nil
}

func validateMDs(ctx context.Context, vc clusterf.StateValidationConfig) error {
mds, err := controller.GetMachineDeployments(ctx, vc.ManagementClusterClient, vc.ClusterSpec.Cluster)
if err != nil {
return fmt.Errorf("failed to retrieve machinedeployments: %s", err)
}
if len(mds) == 0 {
return errors.New("machinedeployment object not found")
}
for _, md := range mds {
if conditions.IsFalse(&md, v1beta1.ReadyCondition) {
return fmt.Errorf("md ready condition is not true for md %s", md.Name)
} else if md.Status.UpdatedReplicas != md.Status.ReadyReplicas || md.Spec.Replicas != &md.Status.UpdatedReplicas {
return fmt.Errorf("md replicas count %d, updated replicas count %d and ready replicas count %d for md %s are not in sync", *md.Spec.Replicas, md.Status.UpdatedReplicas, md.Status.ReadyReplicas, md.Name)
}
}
return nil
}

func validateInPlaceCRsDoesNotExist(ctx context.Context, vc clusterf.StateValidationConfig) error {
if err := validateCPUDeleted(ctx, vc); err != nil {
return err
}
if err := validateMDUsDeleted(ctx, vc); err != nil {
return err
}
if err := validateNUsAndPodsDeleted(ctx, vc); err != nil {
return err
}
return nil
}

func validateCPUDeleted(ctx context.Context, vc clusterf.StateValidationConfig) error {
clusterName := vc.ClusterSpec.Cluster.Name
client := vc.ManagementClusterClient
cpu := &v1alpha1.ControlPlaneUpgrade{}
key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: clusterName + "-cp-upgrade"}
if err := client.Get(ctx, key, cpu); err != nil {
if !apierrors1.IsNotFound(err) {
return fmt.Errorf("failed to get ControlPlaneUpgrade: %s", err)
}
}
if cpu.Name != "" {
return errors.New("CPUpgrade object not expected but still exists on the cluster")
}
return nil
}

func validateMDUsDeleted(ctx context.Context, vc clusterf.StateValidationConfig) error {
mds, err := controller.GetMachineDeployments(ctx, vc.ManagementClusterClient, vc.ClusterSpec.Cluster)
if err != nil {
return fmt.Errorf("failed to retrieve machinedeployments: %s", err)
}
client := vc.ManagementClusterClient
for _, md := range mds {
mdu := &v1alpha1.MachineDeploymentUpgrade{}
key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: md.Name + "-md-upgrade"}
if err := client.Get(ctx, key, mdu); err != nil {
if !apierrors1.IsNotFound(err) {
return fmt.Errorf("failed to get MachineDeploymentUpgrade: %s", err)
}
}
if mdu.Name != "" {
return errors.New("MDUpgrade object not expected but still exists on the cluster")
}
}
return nil
}

func validateNUsAndPodsDeleted(ctx context.Context, vc clusterf.StateValidationConfig) error {
machines := &v1beta1.MachineList{}
if err := vc.ClusterClient.List(ctx, machines); err != nil {
return fmt.Errorf("failed to list machines: %s", err)
}
client := vc.ManagementClusterClient
clusterClient := vc.ClusterClient
for _, machine := range machines.Items {
nu := &v1alpha1.NodeUpgrade{}
po := &corev1.Pod{}
key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: machine.Name + "-node-upgrader"}
if err := client.Get(ctx, key, nu); err != nil {
if !apierrors1.IsNotFound(err) {
return fmt.Errorf("failed to get NodeUpgrade: %s", err)
}
}
if nu.Name != "" {
return errors.New("NodeUpgrade object not expected, but still exists on the cluster")
}
if err := clusterClient.Get(ctx, key, po); err != nil {
if !apierrors1.IsNotFound(err) {
return fmt.Errorf("failed to get Upgrader Pod: %s", err)
}
}
if po.Name != "" {
return errors.New("Upgrader pod object not expected, but still exists on the cluster")
}
}
return nil
}

// ValidateControlPlaneNodes retrieves the control plane nodes from the cluster and checks them against the cluster.Spec.
func ValidateControlPlaneNodes(ctx context.Context, vc clusterf.StateValidationConfig) error {
clus := vc.ClusterSpec.Cluster
Expand Down

0 comments on commit 92f5295

Please sign in to comment.