diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 9ee93ec0e13e..301b496d8c90 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -47,7 +47,6 @@ func main() { WithControllers(ctx, corecontrollers.NewControllers( op.Clock, op.GetClient(), - op.KubernetesInterface, state.NewCluster(op.Clock, op.GetClient(), cloudProvider), op.EventRecorder, cloudProvider, diff --git a/go.mod b/go.mod index cd50c78acbed..e939e18c9737 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/PuerkitoBio/goquery v1.8.1 github.com/avast/retry-go v3.0.0+incompatible github.com/aws/aws-sdk-go v1.48.0 - github.com/aws/karpenter-core v0.32.5 + github.com/aws/karpenter-core v0.32.9 github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c github.com/go-logr/zapr v1.3.0 github.com/imdario/mergo v0.3.16 @@ -42,7 +42,6 @@ require ( github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/deckarep/golang-set v1.8.0 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/evanphx/json-patch v5.7.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.7.0 // indirect diff --git a/go.sum b/go.sum index f2c8d09fe606..a0d01ab2b5d0 100644 --- a/go.sum +++ b/go.sum @@ -57,8 +57,8 @@ github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHS github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/aws/aws-sdk-go v1.48.0 h1:1SeJ8agckRDQvnSCt1dGZYAwUaoD2Ixj6IaXB4LCv8Q= github.com/aws/aws-sdk-go v1.48.0/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= -github.com/aws/karpenter-core v0.32.5 h1:8XEMiPX80TwtMJpGVMXkxkZ7YW4CzgD6X1OGfSft2Es= -github.com/aws/karpenter-core v0.32.5/go.mod h1:RNih2g6qCiah8rFaZ7HkmClIK66Hjj38z3DbWnWGM2w= +github.com/aws/karpenter-core v0.32.9 h1:KICh1aJfzhTbri3b0kH/KdVuupcdeKTDcSebDbnPv40= +github.com/aws/karpenter-core v0.32.9/go.mod h1:DY6qKd/bLlTrWFDtEdYQtapSUtCArP72DpkseN5EB8U= github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c h1:oXWwIttmjYLbBKhLazG21aQvpJ3NOOr8IXhCJ/p6e/M= github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c/go.mod h1:l/TIBsaCx/IrOr0Xvlj/cHLOf05QzuQKEZ1hx2XWmfU= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -86,8 +86,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deckarep/golang-set v1.8.0 h1:sk9/l/KqpunDwP7pSjUg0keiOOLEnOBHzykLrsPppp4= -github.com/deckarep/golang-set v1.8.0/go.mod h1:5nI87KwE7wgsBU1F4GKAw2Qod7p5kyS383rP6+o6qqo= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= diff --git a/hack/toolchain.sh b/hack/toolchain.sh index 97bb2c32b240..61ab33df3961 100755 --- a/hack/toolchain.sh +++ b/hack/toolchain.sh @@ -16,7 +16,7 @@ tools() { go install github.com/mikefarah/yq/v4@latest go install github.com/norwoodj/helm-docs/cmd/helm-docs@latest go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest - go install sigs.k8s.io/controller-tools/cmd/controller-gen@latest + go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.13.0 go install github.com/sigstore/cosign/cmd/cosign@latest go install -tags extended github.com/gohugoio/hugo@v0.110.0 go install golang.org/x/vuln/cmd/govulncheck@latest diff --git a/pkg/apis/v1beta1/ec2nodeclass.go b/pkg/apis/v1beta1/ec2nodeclass.go index 2f9fca67ccc2..728862badde6 100644 --- a/pkg/apis/v1beta1/ec2nodeclass.go +++ b/pkg/apis/v1beta1/ec2nodeclass.go @@ -336,6 +336,12 @@ type EC2NodeClass struct { IsNodeTemplate bool `json:"-" hash:"ignore"` } +// We need to bump the EC2NodeClassHashVersion when we make an update to the EC2NodeClass CRD under these conditions: +// 1. A field changes its default value for an existing field that is already hashed +// 2. A field is added to the hash calculation with an already-set value +// 3. A field is removed from the hash calculations +const EC2NodeClassHashVersion = "v1" + func (in *EC2NodeClass) Hash() string { return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{ SlicesAsSets: true, diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index 21559932fa3c..53fcac481bf1 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -111,6 +111,7 @@ var ( LabelInstanceAcceleratorName = Group + "/instance-accelerator-name" LabelInstanceAcceleratorManufacturer = Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = Group + "/instance-accelerator-count" - AnnotationNodeClassHash = Group + "/ec2nodeclass-hash" + AnnotationEC2NodeClassHash = Group + "/ec2nodeclass-hash" + AnnotationEC2NodeClassHashVersion = Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = Group + "/tagged" ) diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index f407ff94c150..dd1cf7b5367e 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -135,13 +135,26 @@ func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *corev1beta1.NodeClaim, if nodeClaim.IsMachine { ownerHashKey = v1alpha1.AnnotationNodeTemplateHash } else { - ownerHashKey = v1beta1.AnnotationNodeClassHash + ownerHashKey = v1beta1.AnnotationEC2NodeClassHash } nodeClassHash, foundHashNodeClass := nodeClass.Annotations[ownerHashKey] nodeClaimHash, foundHashNodeClaim := nodeClaim.Annotations[ownerHashKey] if !foundHashNodeClass || !foundHashNodeClaim { return "" } + + if !nodeClass.IsNodeTemplate { + nodeClassHashVersion, foundNodeClassHashVersion := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] + nodeClaimHashVersion, foundNodeClaimHashVersion := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] + if !foundNodeClassHashVersion || !foundNodeClaimHashVersion { + return "" + } + // validate that the hash version for the EC2NodeClass is the same as the NodeClaim before evaluating for static drift + if nodeClassHashVersion != nodeClaimHashVersion { + return "" + } + } + if nodeClassHash != nodeClaimHash { return lo.Ternary(nodeClaim.IsMachine, NodeTemplateDrift, NodeClassDrift) } diff --git a/pkg/cloudprovider/nodeclaim_test.go b/pkg/cloudprovider/nodeclaim_test.go index a22146fe2801..6a865f77d760 100644 --- a/pkg/cloudprovider/nodeclaim_test.go +++ b/pkg/cloudprovider/nodeclaim_test.go @@ -99,7 +99,7 @@ var _ = Describe("NodeClaim/CloudProvider", func() { cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim) Expect(err).To(BeNil()) Expect(cloudProviderNodeClaim).ToNot(BeNil()) - _, ok := cloudProviderNodeClaim.ObjectMeta.Annotations[v1beta1.AnnotationNodeClassHash] + _, ok := cloudProviderNodeClaim.ObjectMeta.Annotations[v1beta1.AnnotationEC2NodeClassHash] Expect(ok).To(BeTrue()) }) Context("EC2 Context", func() { @@ -194,10 +194,14 @@ var _ = Describe("NodeClaim/CloudProvider", func() { Reservations: []*ec2.Reservation{{Instances: []*ec2.Instance{instance}}}, }) nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ - v1beta1.AnnotationNodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, }) nodeClaim.Status.ProviderID = fake.ProviderID(lo.FromPtr(instance.InstanceId)) - nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationNodeClassHash: nodeClass.Hash()}) + nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{v1.LabelInstanceTypeStable: selectedInstanceType.Name}) }) It("should not fail if NodeClass does not exist", func() { @@ -226,7 +230,7 @@ var _ = Describe("NodeClaim/CloudProvider", func() { instance.SecurityGroups = []*ec2.GroupIdentifier{{GroupId: aws.String(fake.SecurityGroupID())}} // Assign a fake hash nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ - v1beta1.AnnotationNodeClassHash: "abcdefghijkl", + v1beta1.AnnotationEC2NodeClassHash: "abcdefghijkl", }) ExpectApplied(ctx, env.Client, nodeClass) isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) @@ -327,7 +331,7 @@ var _ = Describe("NodeClaim/CloudProvider", func() { Expect(isDrifted).To(BeEmpty()) Expect(mergo.Merge(nodeClass, changes, mergo.WithOverride)) - nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationNodeClassHash: nodeClass.Hash()}) + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) ExpectApplied(ctx, env.Client, nodeClass) isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim) @@ -350,7 +354,7 @@ var _ = Describe("NodeClaim/CloudProvider", func() { Expect(isDrifted).To(BeEmpty()) Expect(mergo.Merge(nodeClass, changes, mergo.WithOverride)) - nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationNodeClassHash: nodeClass.Hash()}) + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) ExpectApplied(ctx, env.Client, nodeClass) isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim) @@ -371,6 +375,54 @@ var _ = Describe("NodeClaim/CloudProvider", func() { Expect(err).NotTo(HaveOccurred()) Expect(isDrifted).To(BeEmpty()) }) + It("should not return drifted if the NodeClaim's karpenter.k8s.aws/ec2nodeclass-hash-version annotation does not match the EC2NodeClass's", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash-version annotation is not present on the NodeClass", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + } + // should trigger drift + nodeClass.Spec.Tags = map[string]string{ + "Test Key": "Test Value", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash-version annotation is not present on the NodeClaim", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + } + // should trigger drift + nodeClass.Spec.Tags = map[string]string{ + "Test Key": "Test Value", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) }) }) Context("Subnet Compatibility", func() { diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 28c9121dfb09..6d926c3ab175 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -72,7 +72,7 @@ var _ = BeforeSuite(func() { cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) - prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), recorder, cloudProvider, cluster) + prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster) }) var _ = AfterSuite(func() { diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 2bacfc2f6e2c..5fa356a3053c 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -76,7 +76,13 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeCl if !nodeClass.IsNodeTemplate { controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) } + if nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + if err := c.updateNodeClaimHash(ctx, nodeClass); err != nil { + return reconcile.Result{}, err + } + } nodeClass.Annotations = lo.Assign(nodeClass.Annotations, nodeclassutil.HashAnnotation(nodeClass)) + err := multierr.Combine( c.resolveSubnets(ctx, nodeClass), c.resolveSecurityGroups(ctx, nodeClass), @@ -216,6 +222,49 @@ func (c *Controller) resolveInstanceProfile(ctx context.Context, nodeClass *v1be return nil } +// Updating `ec2nodeclass-hash-version` annotation inside the karpenter controller means a breaking change has been made to the hash calculation. +// `ec2nodeclass-hash` annotation on the EC2NodeClass will be updated, due to the breaking change, making the `ec2nodeclass-hash` on the NodeClaim different from +// EC2NodeClass. Since, we cannot rely on the `ec2nodeclass-hash` on the NodeClaims, due to the breaking change, we will need to re-calculate the hash and update the annotation. +// For more information on the Drift Hash Versioning: https://github.com/kubernetes-sigs/karpenter/blob/main/designs/drift-hash-versioning.md +func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error { + if nodeClass.IsNodeTemplate { + return nil + } + + ncList := &corev1beta1.NodeClaimList{} + if err := c.kubeClient.List(ctx, ncList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { + return err + } + + errs := make([]error, len(ncList.Items)) + for i := range ncList.Items { + nc := ncList.Items[i] + stored := nc.DeepCopy() + + if nc.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + nc.Annotations = lo.Assign(nc.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) + + // Any NodeClaim that is already drifted will remain drifted if the karpenter.k8s.aws/nodepool-hash-version doesn't match + // Since the hashing mechanism has changed we will not be able to determine if the drifted status of the NodeClaim has changed + if nc.StatusConditions().GetCondition(corev1beta1.Drifted) == nil { + nc.Annotations = lo.Assign(nc.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + }) + } + + if !equality.Semantic.DeepEqual(stored, nc) { + if err := c.kubeClient.Patch(ctx, &nc, client.MergeFrom(stored)); err != nil { + errs[i] = client.IgnoreNotFound(err) + } + } + } + } + + return multierr.Combine(errs...) +} + var _ corecontroller.FinalizingTypedController[*v1beta1.EC2NodeClass] = (*NodeClassController)(nil) //nolint:revive @@ -230,7 +279,7 @@ func NewNodeClassController(kubeClient client.Client, recorder events.Recorder, }) } -func (c *NodeClassController) Name() string { +func (c *Controller) Name() string { return "nodeclass" } diff --git a/pkg/controllers/nodeclass/nodeclass_test.go b/pkg/controllers/nodeclass/nodeclass_test.go index bea47b33fe51..d42095ae806d 100644 --- a/pkg/controllers/nodeclass/nodeclass_test.go +++ b/pkg/controllers/nodeclass/nodeclass_test.go @@ -29,6 +29,8 @@ import ( _ "knative.dev/pkg/system/testing" "sigs.k8s.io/controller-runtime/pkg/client" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" coretest "github.com/aws/karpenter-core/pkg/test" . "github.com/aws/karpenter-core/pkg/test/expectations" @@ -702,7 +704,7 @@ var _ = Describe("NodeClassController", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) expectedHash := nodeClass.Hash() - Expect(nodeClass.ObjectMeta.Annotations[v1beta1.AnnotationNodeClassHash]).To(Equal(expectedHash)) + Expect(nodeClass.ObjectMeta.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash)) Expect(mergo.Merge(nodeClass, changes, mergo.WithOverride)).To(Succeed()) @@ -711,7 +713,7 @@ var _ = Describe("NodeClassController", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) expectedHashTwo := nodeClass.Hash() - Expect(nodeClass.Annotations[v1beta1.AnnotationNodeClassHash]).To(Equal(expectedHashTwo)) + Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHashTwo)) Expect(expectedHash).ToNot(Equal(expectedHashTwo)) }, @@ -729,7 +731,7 @@ var _ = Describe("NodeClassController", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) expectedHash := nodeClass.Hash() - Expect(nodeClass.Annotations[v1beta1.AnnotationNodeClassHash]).To(Equal(expectedHash)) + Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash)) nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ { @@ -750,7 +752,129 @@ var _ = Describe("NodeClassController", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Annotations[v1beta1.AnnotationNodeClassHash]).To(Equal(expectedHash)) + Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash)) + }) + It("should update ec2nodeclass-hash-version annotation when the ec2nodeclass-hash-version on the NodeClass does not match with the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + ExpectApplied(ctx, env.Client, nodeClass) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + expectedHash := nodeClass.Hash() + // Expect ec2nodeclass-hash on the NodeClass to be updated + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should update ec2nodeclass-hash-versions on all NodeClaims when the ec2nodeclass-hash-version does not match with the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + nodeClaimOne := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaimTwo := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + + ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaimOne = ExpectExists(ctx, env.Client, nodeClaimOne) + nodeClaimTwo = ExpectExists(ctx, env.Client, nodeClaimTwo) + + expectedHash := nodeClass.Hash() + // Expect ec2nodeclass-hash on the NodeClaims to be updated + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass-hash on all NodeClaims when the ec2nodeclass-hash-version matches the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-version", + } + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "1234564654", + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + expectedHash := nodeClass.Hash() + + // Expect ec2nodeclass-hash on the NodeClass to be updated + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + // Expect ec2nodeclass-hash on the NodeClaims to stay the same + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "1234564654")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass-hash on the NodeClaim if it's drifted and the ec2nodeclass-hash-version does not match the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaim.StatusConditions().MarkTrue(corev1beta1.Drifted) + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + // Expect ec2nodeclass-hash on the NodeClaims to stay the same + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "123456")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) }) }) Context("NodeClass Termination", func() { diff --git a/pkg/providers/amifamily/bootstrap/bootstrap.go b/pkg/providers/amifamily/bootstrap/bootstrap.go index b3d9642e817c..e08533f2bdfa 100644 --- a/pkg/providers/amifamily/bootstrap/bootstrap.go +++ b/pkg/providers/amifamily/bootstrap/bootstrap.go @@ -19,6 +19,7 @@ import ( "sort" "strings" + "github.com/mitchellh/hashstructure/v2" "github.com/samber/lo" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -99,6 +100,17 @@ func (o Options) nodeLabelArg() string { return fmt.Sprintf("--node-labels=%q", strings.Join(labelStrings, ",")) } +// TODO: jmdeal@ remove once KubeletConfiguration can be properly hashed +// For more information on the resource.Quantity hash issue: https://github.com/aws/karpenter-provider-aws/issues/5447 +func (o Options) HashReservedResources() string { + kubeReservedHash, systemReservedHash := uint64(0), uint64(0) + if kc := o.KubeletConfig; kc != nil { + kubeReservedHash, _ = hashstructure.Hash(resources.StringMap(kc.KubeReserved), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + systemReservedHash, _ = hashstructure.Hash(resources.StringMap(kc.SystemReserved), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + } + return fmt.Sprintf("%d-%d", kubeReservedHash, systemReservedHash) +} + // joinParameterArgs joins a map of keys and values by their separator. The separator will sit between the // arguments in a comma-separated list i.e. arg1val1,arg2val2 func joinParameterArgs[K comparable, V any](name string, m map[K]V, separator string) string { @@ -119,4 +131,7 @@ func joinParameterArgs[K comparable, V any](name string, m map[K]V, separator st // Examples are the Bottlerocket config and the eks-bootstrap script type Bootstrapper interface { Script() (string, error) + + // TODO: jmdeal@ remove once KubeletConfiguration can be properly hashed + HashReservedResources() string } diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 2246c33e09cf..5766a5b7f84a 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -42,6 +42,7 @@ import ( "github.com/aws/karpenter-core/pkg/cloudprovider" "github.com/aws/karpenter-core/pkg/utils/pretty" + "github.com/aws/karpenter-core/pkg/utils/resources" ) const ( @@ -111,9 +112,32 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio // Compute fully initialized instance types hash key subnetHash, _ := hashstructure.Hash(subnets, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) kcHash, _ := hashstructure.Hash(kc, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + // TODO: remove kubeReservedHash and systemReservedHash once v1.ResourceList objects are hashed as strings in KubeletConfiguration + // For more information on the v1.ResourceList hash issue: https://github.com/kubernetes-sigs/karpenter/issues/1080 + kubeReservedHash, systemReservedHash := uint64(0), uint64(0) + if kc != nil { + kubeReservedHash, _ = hashstructure.Hash(resources.StringMap(kc.KubeReserved), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + systemReservedHash, _ = hashstructure.Hash(resources.StringMap(kc.SystemReserved), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + } blockDeviceMappingsHash, _ := hashstructure.Hash(nodeClass.Spec.BlockDeviceMappings, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - key := fmt.Sprintf("%d-%d-%d-%016x-%016x-%016x-%s", p.instanceTypesSeqNum, p.instanceTypeOfferingsSeqNum, p.unavailableOfferings.SeqNum, subnetHash, kcHash, blockDeviceMappingsHash, aws.StringValue(nodeClass.Spec.AMIFamily)) - + // TODO: remove volumeSizeHash once resource.Quantity objects get hashed as a string in BlockDeviceMappings + // For more information on the resource.Quantity hash issue: https://github.com/aws/karpenter-provider-aws/issues/5447 + volumeSizeHash, _ := hashstructure.Hash(lo.Reduce(nodeClass.Spec.BlockDeviceMappings, func(agg string, block *v1beta1.BlockDeviceMapping, _ int) string { + return fmt.Sprintf("%s/%s", agg, block.EBS.VolumeSize) + }, ""), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + key := fmt.Sprintf("%d-%d-%d-%016x-%016x-%016x-%s-%t-%016x-%016x-%016x", + p.instanceTypesSeqNum, + p.instanceTypeOfferingsSeqNum, + p.unavailableOfferings.SeqNum, + subnetHash, + kcHash, + blockDeviceMappingsHash, + aws.StringValue(nodeClass.Spec.AMIFamily), + nodeClass.IsNodeTemplate, + volumeSizeHash, + kubeReservedHash, + systemReservedHash, + ) if item, ok := p.cache.Get(key); ok { return item.([]*cloudprovider.InstanceType), nil } diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 4a56a7bcbc62..f96f3b391e6e 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -71,7 +71,7 @@ var _ = BeforeSuite(func() { cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) - prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) + prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) var _ = AfterSuite(func() { diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 7c3a81bf6568..f2b8c62b24bb 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -145,7 +145,20 @@ func (p *Provider) Invalidate(ctx context.Context, ltName string, ltID string) { } func launchTemplateName(options *amifamily.LaunchTemplate) string { - hash, err := hashstructure.Hash(options, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + // TODO: jmdeal@ remove custom hash struct once BlockDeviceMapping and KubeletConfiguration hashing is fixed, only hash Options + volumeSizeHash, _ := hashstructure.Hash(lo.Reduce(options.BlockDeviceMappings, func(agg string, block *v1beta1.BlockDeviceMapping, _ int) string { + return fmt.Sprintf("%s/%s", agg, block.EBS.VolumeSize) + }, ""), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + hashStruct := struct { + Options *amifamily.LaunchTemplate + VolumeSizeHash string + ReservedResourcesHash string + }{ + Options: options, + VolumeSizeHash: fmt.Sprint(volumeSizeHash), + ReservedResourcesHash: options.UserData.HashReservedResources(), + } + hash, err := hashstructure.Hash(hashStruct, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) if err != nil { panic(fmt.Sprintf("hashing launch template, %s", err)) } diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 046648db9f71..a8b1cd9cc024 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -71,7 +71,7 @@ var _ = BeforeSuite(func() { cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) - prov = provisioning.NewProvisioner(env.Client, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) + prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) var _ = AfterSuite(func() { diff --git a/pkg/utils/nodeclass/nodeclass.go b/pkg/utils/nodeclass/nodeclass.go index c43e00c92340..c53f8ad4292c 100644 --- a/pkg/utils/nodeclass/nodeclass.go +++ b/pkg/utils/nodeclass/nodeclass.go @@ -284,7 +284,10 @@ func HashAnnotation(nodeClass *v1beta1.EC2NodeClass) map[string]string { nodeTemplate := nodetemplateutil.New(nodeClass) return map[string]string{v1alpha1.AnnotationNodeTemplateHash: nodeTemplate.Hash()} } - return map[string]string{v1beta1.AnnotationNodeClassHash: nodeClass.Hash()} + return map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + } } func createSelectorTags(k string, v string, tagSet []map[string]string) (res []map[string]string) { diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index d36944830030..ae61059c8604 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -270,11 +270,12 @@ func (env *Environment) ExpectPrefixDelegationDisabled() { "ENABLE_PREFIX_DELEGATION", "false", "aws-node") } -func (env *Environment) ExpectExists(obj client.Object) { +func (env *Environment) ExpectExists(obj client.Object) client.Object { GinkgoHelper() Eventually(func(g Gomega) { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(obj), obj)).To(Succeed()) }).WithTimeout(time.Second * 5).Should(Succeed()) + return obj } func (env *Environment) EventuallyExpectHealthy(pods ...*v1.Pod) { @@ -667,6 +668,18 @@ func (env *Environment) EventuallyExpectNodeClaimsReady(nodeClaims ...*corev1bet }).Should(Succeed()) } +func (env *Environment) ConsistentlyExpectNodeClaimsNotDrifted(duration time.Duration, nodeClaims ...*corev1beta1.NodeClaim) { + GinkgoHelper() + nodeClaimNames := lo.Map(nodeClaims, func(nc *corev1beta1.NodeClaim, _ int) string { return nc.Name }) + By(fmt.Sprintf("consistently expect nodeclaims %s not to be drifted for %s", nodeClaimNames, duration)) + Consistently(func(g Gomega) { + for _, nc := range nodeClaims { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) + g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted)).To(BeNil()) + } + }, duration).Should(Succeed()) +} + func (env *Environment) GetNode(nodeName string) v1.Node { GinkgoHelper() var node v1.Node diff --git a/test/suites/alpha/drift/suite_test.go b/test/suites/alpha/drift/suite_test.go index 98c73de39946..c80d6613d613 100644 --- a/test/suites/alpha/drift/suite_test.go +++ b/test/suites/alpha/drift/suite_test.go @@ -388,9 +388,9 @@ var _ = Describe("Drift", Label("AWS"), func() { startingMachineState := env.EventuallyExpectCreatedMachineCount("==", int(numPods)) env.EventuallyExpectCreatedNodeCount("==", int(numPods)) - // Drift the machine with bad configuration + // Drift the nodeClaim with bad configuration that will not register a NodeClaim parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: awssdk.String("/aws/service/ami-amazon-linux-latest/amzn-ami-hvm-x86_64-ebs"), + Name: awssdk.String("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"), }) Expect(err).ToNot(HaveOccurred()) nodeTemplate.Spec.AMISelector = map[string]string{"aws::ids": *parameter.Parameter.Value} diff --git a/test/suites/alpha/expiration/suite_test.go b/test/suites/alpha/expiration/suite_test.go index e0caac5efd7f..8d0e15a2c5da 100644 --- a/test/suites/alpha/expiration/suite_test.go +++ b/test/suites/alpha/expiration/suite_test.go @@ -223,7 +223,7 @@ var _ = Describe("Expiration", func() { // Set a configuration that will not register a machine parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: lo.ToPtr("/aws/service/ami-amazon-linux-latest/amzn-ami-hvm-x86_64-ebs"), + Name: lo.ToPtr("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"), }) Expect(err).ToNot(HaveOccurred()) nodeTemplate.Spec.AMISelector = map[string]string{"aws::ids": *parameter.Parameter.Value} diff --git a/test/suites/alpha/integration/scheduling_test.go b/test/suites/alpha/integration/scheduling_test.go index f07fbabb3cf4..c67bf7896be5 100644 --- a/test/suites/alpha/integration/scheduling_test.go +++ b/test/suites/alpha/integration/scheduling_test.go @@ -18,8 +18,6 @@ import ( "fmt" "time" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" "github.com/samber/lo" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -27,6 +25,9 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "knative.dev/pkg/ptr" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" "github.com/aws/karpenter-core/pkg/test" "github.com/aws/karpenter/pkg/apis/v1alpha1" @@ -356,6 +357,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { TopologyKey: v1.LabelTopologyZone, WhenUnsatisfiable: v1.DoNotSchedule, LabelSelector: &metav1.LabelSelector{MatchLabels: podLabels}, + MinDomains: lo.ToPtr(int32(3)), }, }, }, @@ -363,7 +365,10 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.ExpectCreated(provisioner, provider, deployment) env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(podLabels), 3) - env.ExpectCreatedNodeCount("==", 3) + // Karpenter will launch three nodes, however if all three nodes don't get register with the cluster at the same time, two pods will be placed on one node. + // This can result in a case where all 3 pods are healthy, while there are only two created nodes. + // In that case, we still expect to eventually have three nodes. + env.EventuallyExpectNodeCount("==", 3) }) It("should provision a node using a provisioner with higher priority", func() { provisionerLowPri := test.Provisioner(test.ProvisionerOptions{ diff --git a/test/suites/beta/drift/suite_test.go b/test/suites/beta/drift/suite_test.go index 207420a8cd0b..a0b3826c6f29 100644 --- a/test/suites/beta/drift/suite_test.go +++ b/test/suites/beta/drift/suite_test.go @@ -446,6 +446,104 @@ var _ = Describe("Drift", Label("AWS"), func() { env.EventuallyExpectNotFound(pod, node) env.EventuallyExpectHealthyPodCount(selector, numPods) }) + It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() { + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + nodeClass = env.ExpectExists(nodeClass).(*v1beta1.EC2NodeClass) + expectedHash := nodeClass.Hash() + + By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain %s and %s annotations", nodeClass.Name, nodeClaim.Name, v1beta1.AnnotationEC2NodeClassHash, v1beta1.AnnotationEC2NodeClassHashVersion)) + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-1", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + }) + // Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the + // nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that + // none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotation + nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{ + "test-key": "test-value", + }) + nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-2", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + }) + + // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass + env.ExpectUpdated(nodeClaim, nodeClass) + expectedHash = nodeClass.Hash() + + // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) + }) + It("should update the nodepool-hash annotation on the nodepool and nodeclaim when the nodepool's nodepool-hash-version annotation does not match the controller hash version", func() { + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + nodePool = env.ExpectExists(nodePool).(*corev1beta1.NodePool) + expectedHash := nodePool.Hash() + + By(fmt.Sprintf("expect nodepool %s and nodeclaim %s to contain %s and %s annotations", nodePool.Name, nodeClaim.Name, corev1beta1.NodePoolHashAnnotationKey, corev1beta1.NodePoolHashVersionAnnotationKey)) + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + + nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + corev1beta1.NodePoolHashAnnotationKey: "test-hash-1", + corev1beta1.NodePoolHashVersionAnnotationKey: "test-hash-version-1", + }) + // Updating `nodePool.Spec.Template.Annotations` would normally trigger drift on all nodeclaims owned by the + // nodepool. However, the nodepool-hash-version does not match the controller hash version, so we will see that + // none of the nodeclaims will be drifted and all nodeclaims will have an updated `nodepool-hash` and `nodepool-hash-version` annotation + nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{ + "test-key": "test-value", + }) + nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + corev1beta1.NodePoolHashAnnotationKey: "test-hash-2", + corev1beta1.NodePoolHashVersionAnnotationKey: "test-hash-version-2", + }) + + // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodepool + env.ExpectUpdated(nodeClaim, nodePool) + expectedHash = nodePool.Hash() + + // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) + }) Context("Failure", func() { It("should not continue to drift if a node never registers", func() { // launch a new nodeClaim @@ -467,9 +565,9 @@ var _ = Describe("Drift", Label("AWS"), func() { startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) env.EventuallyExpectCreatedNodeCount("==", int(numPods)) - // Drift the nodeClaim with bad configuration + // Drift the nodeClaim with bad configuration that will not register a NodeClaim parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: awssdk.String("/aws/service/ami-amazon-linux-latest/amzn-ami-hvm-x86_64-ebs"), + Name: awssdk.String("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"), }) Expect(err).ToNot(HaveOccurred()) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: *parameter.Parameter.Value}} diff --git a/test/suites/beta/expiration/suite_test.go b/test/suites/beta/expiration/suite_test.go index 0b21e210d7fb..daff04e616dd 100644 --- a/test/suites/beta/expiration/suite_test.go +++ b/test/suites/beta/expiration/suite_test.go @@ -211,7 +211,7 @@ var _ = Describe("Expiration", func() { // Set a configuration that will not register a NodeClaim parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: lo.ToPtr("/aws/service/ami-amazon-linux-latest/amzn-ami-hvm-x86_64-ebs"), + Name: lo.ToPtr("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"), }) Expect(err).ToNot(HaveOccurred()) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ diff --git a/test/suites/beta/integration/hash_test.go b/test/suites/beta/integration/hash_test.go index 552abb3ffffe..98c59c49d704 100644 --- a/test/suites/beta/integration/hash_test.go +++ b/test/suites/beta/integration/hash_test.go @@ -45,7 +45,7 @@ var _ = Describe("CRD Hash", func() { err := env.Client.Get(env, client.ObjectKeyFromObject(nodeClass), nc) g.Expect(err).ToNot(HaveOccurred()) - hash, found := nc.Annotations[v1beta1.AnnotationNodeClassHash] + hash, found := nc.Annotations[v1beta1.AnnotationEC2NodeClassHash] g.Expect(found).To(BeTrue()) g.Expect(hash).To(Equal(nc.Hash())) }) diff --git a/test/suites/beta/integration/scheduling_test.go b/test/suites/beta/integration/scheduling_test.go index 365936087fef..5869b6a8be36 100644 --- a/test/suites/beta/integration/scheduling_test.go +++ b/test/suites/beta/integration/scheduling_test.go @@ -346,6 +346,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { TopologyKey: v1.LabelTopologyZone, WhenUnsatisfiable: v1.DoNotSchedule, LabelSelector: &metav1.LabelSelector{MatchLabels: podLabels}, + MinDomains: lo.ToPtr(int32(3)), }, }, }, @@ -353,7 +354,10 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.ExpectCreated(nodeClass, nodePool, deployment) env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(podLabels), 3) - env.ExpectCreatedNodeCount("==", 3) + // Karpenter will launch three nodes, however if all three nodes don't get register with the cluster at the same time, two pods will be placed on one node. + // This can result in a case where all 3 pods are healthy, while there are only two created nodes. + // In that case, we still expect to eventually have three nodes. + env.EventuallyExpectNodeCount("==", 3) }) It("should provision a node using a NodePool with higher priority", func() { nodePoolLowPri := test.NodePool(corev1beta1.NodePool{