From acd3ed36d0895831fb904648a6d51dbb31b1c48c Mon Sep 17 00:00:00 2001 From: Damiano Donati Date: Wed, 13 Nov 2024 15:30:13 +0100 Subject: [PATCH] test: e2e: make managed suite more robust to errors with Eventually() --- test/e2e/suites/managed/addon.go | 6 ++- test/e2e/suites/managed/aws_node_env.go | 33 ++++++++----- test/e2e/suites/managed/eks_test.go | 26 ++++------ test/e2e/suites/managed/helpers.go | 65 +++++++++++++++++++------ 4 files changed, 85 insertions(+), 45 deletions(-) diff --git a/test/e2e/suites/managed/addon.go b/test/e2e/suites/managed/addon.go index 9f124499ab..fb243f0406 100644 --- a/test/e2e/suites/managed/addon.go +++ b/test/e2e/suites/managed/addon.go @@ -22,6 +22,7 @@ package managed import ( "context" "fmt" + "time" "github.com/aws/aws-sdk-go/aws/client" "github.com/aws/aws-sdk-go/service/eks" @@ -62,8 +63,9 @@ func CheckAddonExistsSpec(ctx context.Context, inputGetter func() CheckAddonExis By(fmt.Sprintf("Getting control plane: %s", controlPlaneName)) controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{} - err := mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane) - Expect(err).ToNot(HaveOccurred()) + Eventually(func() error { + return mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane) + }, 2*time.Minute, 5*time.Second).Should(Succeed(), "eventually failed trying to get the AWSManagedControlPlane") By(fmt.Sprintf("Checking EKS addon %s is installed on cluster %s and is active", input.AddonName, input.ClusterName)) waitForEKSAddonToHaveStatus(waitForEKSAddonToHaveStatusInput{ diff --git a/test/e2e/suites/managed/aws_node_env.go b/test/e2e/suites/managed/aws_node_env.go index 192067e249..c4214d52c7 100644 --- a/test/e2e/suites/managed/aws_node_env.go +++ b/test/e2e/suites/managed/aws_node_env.go @@ -21,7 +21,9 @@ package managed import ( "context" + "errors" "fmt" + "time" "github.com/aws/aws-sdk-go/aws/client" . "github.com/onsi/ginkgo/v2" @@ -57,8 +59,9 @@ func CheckAwsNodeEnvVarsSet(ctx context.Context, inputGetter func() UpdateAwsNod By(fmt.Sprintf("Getting control plane: %s", controlPlaneName)) controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{} - err := mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane) - Expect(err).ToNot(HaveOccurred()) + Eventually(func() error { + return mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane) + }, 2*time.Minute, 5*time.Second).Should(Succeed(), "eventually failed trying to get the AWSManagedControlPlane") By(fmt.Sprintf("Checking environment variables are set on AWSManagedControlPlane: %s", controlPlaneName)) Expect(controlPlane.Spec.VpcCni.Env).NotTo(BeNil()) @@ -66,18 +69,24 @@ func CheckAwsNodeEnvVarsSet(ctx context.Context, inputGetter func() UpdateAwsNod By("Checking if aws-node has been updated with the defined environment variables on the workload cluster") daemonSet := &appsv1.DaemonSet{} - clusterClient := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, input.Namespace.Name, input.ClusterName).GetClient() - err = clusterClient.Get(ctx, crclient.ObjectKey{Namespace: "kube-system", Name: "aws-node"}, daemonSet) - Expect(err).ToNot(HaveOccurred()) - - for _, container := range daemonSet.Spec.Template.Spec.Containers { - if container.Name == "aws-node" { - Expect(matchEnvVar(container.Env, corev1.EnvVar{Name: "FOO", Value: "BAR"})).Should(BeTrue()) - Expect(matchEnvVar(container.Env, corev1.EnvVar{Name: "ENABLE_PREFIX_DELEGATION", Value: "true"})).Should(BeTrue()) - break + + Eventually(func() error { + if err := clusterClient.Get(ctx, crclient.ObjectKey{Namespace: "kube-system", Name: "aws-node"}, daemonSet); err != nil { + return fmt.Errorf("unable to get aws-node: %w", err) } - } + + for _, container := range daemonSet.Spec.Template.Spec.Containers { + if container.Name == "aws-node" { + if matchEnvVar(container.Env, corev1.EnvVar{Name: "FOO", Value: "BAR"}) && + matchEnvVar(container.Env, corev1.EnvVar{Name: "ENABLE_PREFIX_DELEGATION", Value: "true"}) { + return nil + } + } + } + + return errors.New("unable to find the expected environment variables on the aws-node DaemonSet's container") + }, 2*time.Minute, 5*time.Second).Should(Succeed(), "should have been able to find the expected aws-node DaemonSet") } func matchEnvVar(s []corev1.EnvVar, ev corev1.EnvVar) bool { diff --git a/test/e2e/suites/managed/eks_test.go b/test/e2e/suites/managed/eks_test.go index 39a1eaa150..ec2b08d343 100644 --- a/test/e2e/suites/managed/eks_test.go +++ b/test/e2e/suites/managed/eks_test.go @@ -22,7 +22,6 @@ package managed import ( "context" "fmt" - "time" "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -77,22 +76,15 @@ var _ = ginkgo.Describe("[managed] [general] EKS cluster tests", func() { }) ginkgo.By("should set environment variables on the aws-node daemonset") - Eventually(func() error { - defer ginkgo.GinkgoRecover() - CheckAwsNodeEnvVarsSet(ctx, func() UpdateAwsNodeVersionSpecInput { - return UpdateAwsNodeVersionSpecInput{ - E2EConfig: e2eCtx.E2EConfig, - BootstrapClusterProxy: e2eCtx.Environment.BootstrapClusterProxy, - AWSSession: e2eCtx.BootstrapUserAWSSession, - Namespace: namespace, - ClusterName: clusterName, - } - }) - return nil - }).WithTimeout(5*time.Minute).WithPolling(10*time.Second).WithContext(ctx).Should( - Succeed(), - "Failed to verify AWS Node environment variables after 5 minutes of retries", - ) + CheckAwsNodeEnvVarsSet(ctx, func() UpdateAwsNodeVersionSpecInput { + return UpdateAwsNodeVersionSpecInput{ + E2EConfig: e2eCtx.E2EConfig, + BootstrapClusterProxy: e2eCtx.Environment.BootstrapClusterProxy, + AWSSession: e2eCtx.BootstrapUserAWSSession, + Namespace: namespace, + ClusterName: clusterName, + } + }) ginkgo.By("should have the VPC CNI installed") CheckAddonExistsSpec(ctx, func() CheckAddonExistsSpecInput { diff --git a/test/e2e/suites/managed/helpers.go b/test/e2e/suites/managed/helpers.go index a749148a3d..dde3deb765 100644 --- a/test/e2e/suites/managed/helpers.go +++ b/test/e2e/suites/managed/helpers.go @@ -32,6 +32,7 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apimachinerytypes "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" crclient "sigs.k8s.io/controller-runtime/pkg/client" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" @@ -74,14 +75,19 @@ func getASGName(clusterName string) string { } func verifyClusterActiveAndOwned(eksClusterName string, sess client.ConfigProvider) { - cluster, err := getEKSCluster(eksClusterName, sess) - Expect(err).NotTo(HaveOccurred()) + var ( + cluster *eks.Cluster + err error + ) + Eventually(func() error { + cluster, err = getEKSCluster(eksClusterName, sess) + return err + }, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to get EKS Cluster %q", eksClusterName)) tagName := infrav1.ClusterTagKey(eksClusterName) tagValue, ok := cluster.Tags[tagName] Expect(ok).To(BeTrue(), "expecting the cluster owned tag to exist") Expect(*tagValue).To(BeEquivalentTo(string(infrav1.ResourceLifecycleOwned))) - Expect(*cluster.Status).To(BeEquivalentTo(eks.ClusterStatusActive)) } @@ -102,6 +108,7 @@ func getEKSClusterAddon(eksClusterName, addonName string, sess client.ConfigProv AddonName: &addonName, ClusterName: &eksClusterName, } + describeOutput, err := eksClient.DescribeAddon(describeInput) if err != nil { return nil, fmt.Errorf("describing eks addon %s: %w", addonName, err) @@ -112,16 +119,16 @@ func getEKSClusterAddon(eksClusterName, addonName string, sess client.ConfigProv func verifySecretExists(ctx context.Context, secretName, namespace string, k8sclient crclient.Client) { secret := &corev1.Secret{} - err := k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: secretName, Namespace: namespace}, secret) - - Expect(err).ShouldNot(HaveOccurred()) + Eventually(func() error { + return k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: secretName, Namespace: namespace}, secret) + }, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to verify Secret %q exists", secretName)) } func verifyConfigMapExists(ctx context.Context, name, namespace string, k8sclient crclient.Client) { cm := &corev1.ConfigMap{} Eventually(func() error { return k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: name, Namespace: namespace}, cm) - }, 2*time.Minute, 5*time.Second).Should(Succeed()) + }, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to verify ConfigMap %q exists", name)) } func VerifyRoleExistsAndOwned(roleName string, eksClusterName string, checkOwned bool, sess client.ConfigProvider) { @@ -130,8 +137,15 @@ func VerifyRoleExistsAndOwned(roleName string, eksClusterName string, checkOwned RoleName: aws.String(roleName), } - output, err := iamClient.GetRole(input) - Expect(err).ShouldNot(HaveOccurred()) + var ( + output *iam.GetRoleOutput + err error + ) + + Eventually(func() error { + output, err = iamClient.GetRole(input) + return err + }, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to get IAM Role %q", roleName)) if checkOwned { found := false @@ -152,9 +166,24 @@ func verifyManagedNodeGroup(eksClusterName, nodeGroupName string, checkOwned boo ClusterName: aws.String(eksClusterName), NodegroupName: aws.String(nodeGroupName), } - result, err := eksClient.DescribeNodegroup(input) - Expect(err).NotTo(HaveOccurred()) - Expect(*result.Nodegroup.Status).To(BeEquivalentTo(eks.NodegroupStatusActive)) + var ( + result *eks.DescribeNodegroupOutput + err error + ) + + Eventually(func() error { + result, err = eksClient.DescribeNodegroup(input) + if err != nil { + return fmt.Errorf("error describing nodegroup: %w", err) + } + + nodeGroupStatus := ptr.Deref(result.Nodegroup.Status, "") + if nodeGroupStatus != eks.NodegroupStatusActive { + return fmt.Errorf("expected nodegroup.Status to be %q, was %q instead", eks.NodegroupStatusActive, nodeGroupStatus) + } + + return nil + }, 2*time.Minute, 5*time.Second).Should(Succeed(), "eventually failed trying to describe EKS Node group") if checkOwned { tagName := infrav1.ClusterAWSCloudProviderTagKey(eksClusterName) @@ -172,8 +201,16 @@ func verifyASG(eksClusterName, asgName string, checkOwned bool, sess client.Conf }, } - result, err := asgClient.DescribeAutoScalingGroups(input) - Expect(err).NotTo(HaveOccurred()) + var ( + result *autoscaling.DescribeAutoScalingGroupsOutput + err error + ) + + Eventually(func() error { + result, err = asgClient.DescribeAutoScalingGroups(input) + return err + }, 2*time.Minute, 5*time.Second).Should(Succeed()) + for _, instance := range result.AutoScalingGroups[0].Instances { Expect(*instance.LifecycleState).To(Equal("InService"), "expecting the instance in service") }