Skip to content

Commit

Permalink
test: e2e: make managed suite more robust to errors with Eventually()
Browse files Browse the repository at this point in the history
  • Loading branch information
damdo committed Nov 13, 2024
1 parent df09e6c commit acd3ed3
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 45 deletions.
6 changes: 4 additions & 2 deletions test/e2e/suites/managed/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package managed
import (
"context"
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws/client"
"github.com/aws/aws-sdk-go/service/eks"
Expand Down Expand Up @@ -62,8 +63,9 @@ func CheckAddonExistsSpec(ctx context.Context, inputGetter func() CheckAddonExis

By(fmt.Sprintf("Getting control plane: %s", controlPlaneName))
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
err := mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
Expect(err).ToNot(HaveOccurred())
Eventually(func() error {
return mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
}, 2*time.Minute, 5*time.Second).Should(Succeed(), "eventually failed trying to get the AWSManagedControlPlane")

By(fmt.Sprintf("Checking EKS addon %s is installed on cluster %s and is active", input.AddonName, input.ClusterName))
waitForEKSAddonToHaveStatus(waitForEKSAddonToHaveStatusInput{
Expand Down
33 changes: 21 additions & 12 deletions test/e2e/suites/managed/aws_node_env.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ package managed

import (
"context"
"errors"
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws/client"
. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -57,27 +59,34 @@ func CheckAwsNodeEnvVarsSet(ctx context.Context, inputGetter func() UpdateAwsNod

By(fmt.Sprintf("Getting control plane: %s", controlPlaneName))
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
err := mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
Expect(err).ToNot(HaveOccurred())
Eventually(func() error {
return mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
}, 2*time.Minute, 5*time.Second).Should(Succeed(), "eventually failed trying to get the AWSManagedControlPlane")

By(fmt.Sprintf("Checking environment variables are set on AWSManagedControlPlane: %s", controlPlaneName))
Expect(controlPlane.Spec.VpcCni.Env).NotTo(BeNil())
Expect(len(controlPlane.Spec.VpcCni.Env)).Should(BeNumerically(">", 1))

By("Checking if aws-node has been updated with the defined environment variables on the workload cluster")
daemonSet := &appsv1.DaemonSet{}

clusterClient := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, input.Namespace.Name, input.ClusterName).GetClient()
err = clusterClient.Get(ctx, crclient.ObjectKey{Namespace: "kube-system", Name: "aws-node"}, daemonSet)
Expect(err).ToNot(HaveOccurred())

for _, container := range daemonSet.Spec.Template.Spec.Containers {
if container.Name == "aws-node" {
Expect(matchEnvVar(container.Env, corev1.EnvVar{Name: "FOO", Value: "BAR"})).Should(BeTrue())
Expect(matchEnvVar(container.Env, corev1.EnvVar{Name: "ENABLE_PREFIX_DELEGATION", Value: "true"})).Should(BeTrue())
break

Eventually(func() error {
if err := clusterClient.Get(ctx, crclient.ObjectKey{Namespace: "kube-system", Name: "aws-node"}, daemonSet); err != nil {
return fmt.Errorf("unable to get aws-node: %w", err)
}
}

for _, container := range daemonSet.Spec.Template.Spec.Containers {
if container.Name == "aws-node" {
if matchEnvVar(container.Env, corev1.EnvVar{Name: "FOO", Value: "BAR"}) &&
matchEnvVar(container.Env, corev1.EnvVar{Name: "ENABLE_PREFIX_DELEGATION", Value: "true"}) {
return nil
}
}
}

return errors.New("unable to find the expected environment variables on the aws-node DaemonSet's container")
}, 2*time.Minute, 5*time.Second).Should(Succeed(), "should have been able to find the expected aws-node DaemonSet")
}

func matchEnvVar(s []corev1.EnvVar, ev corev1.EnvVar) bool {
Expand Down
26 changes: 9 additions & 17 deletions test/e2e/suites/managed/eks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ package managed
import (
"context"
"fmt"
"time"

"github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand Down Expand Up @@ -77,22 +76,15 @@ var _ = ginkgo.Describe("[managed] [general] EKS cluster tests", func() {
})

ginkgo.By("should set environment variables on the aws-node daemonset")
Eventually(func() error {
defer ginkgo.GinkgoRecover()
CheckAwsNodeEnvVarsSet(ctx, func() UpdateAwsNodeVersionSpecInput {
return UpdateAwsNodeVersionSpecInput{
E2EConfig: e2eCtx.E2EConfig,
BootstrapClusterProxy: e2eCtx.Environment.BootstrapClusterProxy,
AWSSession: e2eCtx.BootstrapUserAWSSession,
Namespace: namespace,
ClusterName: clusterName,
}
})
return nil
}).WithTimeout(5*time.Minute).WithPolling(10*time.Second).WithContext(ctx).Should(
Succeed(),
"Failed to verify AWS Node environment variables after 5 minutes of retries",
)
CheckAwsNodeEnvVarsSet(ctx, func() UpdateAwsNodeVersionSpecInput {
return UpdateAwsNodeVersionSpecInput{
E2EConfig: e2eCtx.E2EConfig,
BootstrapClusterProxy: e2eCtx.Environment.BootstrapClusterProxy,
AWSSession: e2eCtx.BootstrapUserAWSSession,
Namespace: namespace,
ClusterName: clusterName,
}
})

ginkgo.By("should have the VPC CNI installed")
CheckAddonExistsSpec(ctx, func() CheckAddonExistsSpecInput {
Expand Down
65 changes: 51 additions & 14 deletions test/e2e/suites/managed/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
apimachinerytypes "k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
crclient "sigs.k8s.io/controller-runtime/pkg/client"

infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
Expand Down Expand Up @@ -74,14 +75,19 @@ func getASGName(clusterName string) string {
}

func verifyClusterActiveAndOwned(eksClusterName string, sess client.ConfigProvider) {
cluster, err := getEKSCluster(eksClusterName, sess)
Expect(err).NotTo(HaveOccurred())
var (
cluster *eks.Cluster
err error
)
Eventually(func() error {
cluster, err = getEKSCluster(eksClusterName, sess)
return err
}, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to get EKS Cluster %q", eksClusterName))

tagName := infrav1.ClusterTagKey(eksClusterName)
tagValue, ok := cluster.Tags[tagName]
Expect(ok).To(BeTrue(), "expecting the cluster owned tag to exist")
Expect(*tagValue).To(BeEquivalentTo(string(infrav1.ResourceLifecycleOwned)))

Expect(*cluster.Status).To(BeEquivalentTo(eks.ClusterStatusActive))
}

Expand All @@ -102,6 +108,7 @@ func getEKSClusterAddon(eksClusterName, addonName string, sess client.ConfigProv
AddonName: &addonName,
ClusterName: &eksClusterName,
}

describeOutput, err := eksClient.DescribeAddon(describeInput)
if err != nil {
return nil, fmt.Errorf("describing eks addon %s: %w", addonName, err)
Expand All @@ -112,16 +119,16 @@ func getEKSClusterAddon(eksClusterName, addonName string, sess client.ConfigProv

func verifySecretExists(ctx context.Context, secretName, namespace string, k8sclient crclient.Client) {
secret := &corev1.Secret{}
err := k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: secretName, Namespace: namespace}, secret)

Expect(err).ShouldNot(HaveOccurred())
Eventually(func() error {
return k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: secretName, Namespace: namespace}, secret)
}, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to verify Secret %q exists", secretName))
}

func verifyConfigMapExists(ctx context.Context, name, namespace string, k8sclient crclient.Client) {
cm := &corev1.ConfigMap{}
Eventually(func() error {
return k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: name, Namespace: namespace}, cm)
}, 2*time.Minute, 5*time.Second).Should(Succeed())
}, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to verify ConfigMap %q exists", name))
}

func VerifyRoleExistsAndOwned(roleName string, eksClusterName string, checkOwned bool, sess client.ConfigProvider) {
Expand All @@ -130,8 +137,15 @@ func VerifyRoleExistsAndOwned(roleName string, eksClusterName string, checkOwned
RoleName: aws.String(roleName),
}

output, err := iamClient.GetRole(input)
Expect(err).ShouldNot(HaveOccurred())
var (
output *iam.GetRoleOutput
err error
)

Eventually(func() error {
output, err = iamClient.GetRole(input)
return err
}, 2*time.Minute, 5*time.Second).Should(Succeed(), fmt.Sprintf("eventually failed trying to get IAM Role %q", roleName))

if checkOwned {
found := false
Expand All @@ -152,9 +166,24 @@ func verifyManagedNodeGroup(eksClusterName, nodeGroupName string, checkOwned boo
ClusterName: aws.String(eksClusterName),
NodegroupName: aws.String(nodeGroupName),
}
result, err := eksClient.DescribeNodegroup(input)
Expect(err).NotTo(HaveOccurred())
Expect(*result.Nodegroup.Status).To(BeEquivalentTo(eks.NodegroupStatusActive))
var (
result *eks.DescribeNodegroupOutput
err error
)

Eventually(func() error {
result, err = eksClient.DescribeNodegroup(input)
if err != nil {
return fmt.Errorf("error describing nodegroup: %w", err)
}

nodeGroupStatus := ptr.Deref(result.Nodegroup.Status, "")
if nodeGroupStatus != eks.NodegroupStatusActive {
return fmt.Errorf("expected nodegroup.Status to be %q, was %q instead", eks.NodegroupStatusActive, nodeGroupStatus)
}

return nil
}, 2*time.Minute, 5*time.Second).Should(Succeed(), "eventually failed trying to describe EKS Node group")

if checkOwned {
tagName := infrav1.ClusterAWSCloudProviderTagKey(eksClusterName)
Expand All @@ -172,8 +201,16 @@ func verifyASG(eksClusterName, asgName string, checkOwned bool, sess client.Conf
},
}

result, err := asgClient.DescribeAutoScalingGroups(input)
Expect(err).NotTo(HaveOccurred())
var (
result *autoscaling.DescribeAutoScalingGroupsOutput
err error
)

Eventually(func() error {
result, err = asgClient.DescribeAutoScalingGroups(input)
return err
}, 2*time.Minute, 5*time.Second).Should(Succeed())

for _, instance := range result.AutoScalingGroups[0].Instances {
Expect(*instance.LifecycleState).To(Equal("InService"), "expecting the instance in service")
}
Expand Down

0 comments on commit acd3ed3

Please sign in to comment.