From 275b3d6b145d61045bbd55491f47657e322510fe Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Mon, 25 Nov 2024 19:06:43 +0100 Subject: [PATCH] Create lifecycle hooks together with ASG --- .../awsmachinepool_controller_test.go | 34 +++++++- .../services/autoscaling/autoscalinggroup.go | 81 +++++++----------- .../services/autoscaling/lifecyclehook.go | 83 +++++++++---------- 3 files changed, 100 insertions(+), 98 deletions(-) diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index cf7f063e92..8de7277ee8 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -304,7 +304,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { asgSvc.EXPECT().CreateASG(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", }, nil) - asgSvc.EXPECT().SuspendProcesses("name", []string{"Launch", "Terminate"}).Return(nil).AnyTimes().Times(0) + asgSvc.EXPECT().SuspendProcesses("name", []string{"Launch", "Terminate"}).Return(nil).Times(0) err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) g.Expect(err).To(Succeed()) @@ -341,7 +341,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { "InstanceRefresh", "HealthCheck", "ReplaceUnhealthy", - })).Return(nil).AnyTimes().Times(1) + })).Return(nil).Times(1) err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) g.Expect(err).To(Succeed()) @@ -373,8 +373,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil) asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).AnyTimes() - asgSvc.EXPECT().SuspendProcesses("name", []string{"Terminate"}).Return(nil).AnyTimes().Times(1) - asgSvc.EXPECT().ResumeProcesses("name", []string{"process3"}).Return(nil).AnyTimes().Times(1) + asgSvc.EXPECT().SuspendProcesses("name", []string{"Terminate"}).Return(nil).Times(1) + asgSvc.EXPECT().ResumeProcesses("name", []string{"process3"}).Return(nil).Times(1) err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) g.Expect(err).To(Succeed()) @@ -813,6 +813,32 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }) }) t.Run("Lifecycle Hooks", func(t *testing.T) { + t.Run("ASG created with lifecycle hooks", func(t *testing.T) { + g := NewWithT(t) + setup(t, g) + defer teardown(t, g) + + newLifecycleHook := expinfrav1.AWSLifecycleHook{ + Name: "new-hook", + LifecycleTransition: "autoscaling:EC2_INSTANCE_LAUNCHING", + } + ms.AWSMachinePool.Spec.AWSLifecycleHooks = append(ms.AWSMachinePool.Spec.AWSLifecycleHooks, newLifecycleHook) + + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + + // New ASG must be created with lifecycle hooks (single AWS SDK call is enough) + // + // TODO: Since GetASGByName and CreateASG are both in the same interface, we can't inspect the actual + // `CreateAutoScalingGroupWithContext` requests parameters here. Make this better testable down to + // AWS SDK level and check `CreateAutoScalingGroupInput.LifecycleHookSpecificationList`. + asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) + asgSvc.EXPECT().CreateASG(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ + Name: "name", + }, nil) + + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + g.Expect(err).To(Succeed()) + }) t.Run("New lifecycle hook is added", func(t *testing.T) { g := NewWithT(t) setup(t, g) diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup.go b/pkg/cloud/services/autoscaling/autoscalinggroup.go index e710fa768e..1c2c8c96d6 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup.go @@ -163,24 +163,17 @@ func (s *Service) CreateASG(machinePoolScope *scope.MachinePoolScope) (*expinfra return nil, fmt.Errorf("getting subnets for ASG: %w", err) } - input := &expinfrav1.AutoScalingGroup{ - Name: machinePoolScope.Name(), - MaxSize: machinePoolScope.AWSMachinePool.Spec.MaxSize, - MinSize: machinePoolScope.AWSMachinePool.Spec.MinSize, - Subnets: subnets, - DefaultCoolDown: machinePoolScope.AWSMachinePool.Spec.DefaultCoolDown, - DefaultInstanceWarmup: machinePoolScope.AWSMachinePool.Spec.DefaultInstanceWarmup, - CapacityRebalance: machinePoolScope.AWSMachinePool.Spec.CapacityRebalance, - MixedInstancesPolicy: machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy, - } + name := machinePoolScope.Name() + s.scope.Info("Creating ASG", "name", name) // Default value of MachinePool replicas set by CAPI is 1. mpReplicas := *machinePoolScope.MachinePool.Spec.Replicas + var desiredCapacity *int32 // Check that MachinePool replicas number is between the minimum and maximum size of the AWSMachinePool. // Ignore the problem for externally managed clusters because MachinePool replicas will be updated to the right value automatically. if mpReplicas >= machinePoolScope.AWSMachinePool.Spec.MinSize && mpReplicas <= machinePoolScope.AWSMachinePool.Spec.MaxSize { - input.DesiredCapacity = &mpReplicas + desiredCapacity = &mpReplicas } else if !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { return nil, fmt.Errorf("incorrect number of replicas %d in MachinePool %v", mpReplicas, machinePoolScope.MachinePool.Name) } @@ -194,62 +187,46 @@ func (s *Service) CreateASG(machinePoolScope *scope.MachinePoolScope) (*expinfra // Set the cloud provider tag additionalTags[infrav1.ClusterAWSCloudProviderTagKey(s.scope.KubernetesClusterName())] = string(infrav1.ResourceLifecycleOwned) - input.Tags = infrav1.Build(infrav1.BuildParams{ - ClusterName: s.scope.KubernetesClusterName(), - Lifecycle: infrav1.ResourceLifecycleOwned, - Name: aws.String(machinePoolScope.Name()), - Role: aws.String("node"), - Additional: additionalTags, - }) - - s.scope.Info("Running instance") - if err := s.runPool(input, machinePoolScope.AWSMachinePool.Status.LaunchTemplateID); err != nil { - // Only record the failure event if the error is not related to failed dependencies. - // This is to avoid spamming failure events since the machine will be requeued by the actuator. - // if !awserrors.IsFailedDependency(errors.Cause(err)) { - // record.Warnf(scope.AWSMachinePool, "FailedCreate", "Failed to create instance: %v", err) - // } - s.scope.Error(err, "unable to create AutoScalingGroup") - return nil, err - } - record.Eventf(machinePoolScope.AWSMachinePool, "SuccessfulCreate", "Created new ASG: %s", machinePoolScope.Name()) - - return nil, nil -} - -func (s *Service) runPool(i *expinfrav1.AutoScalingGroup, launchTemplateID string) error { input := &autoscaling.CreateAutoScalingGroupInput{ - AutoScalingGroupName: aws.String(i.Name), - MaxSize: aws.Int64(int64(i.MaxSize)), - MinSize: aws.Int64(int64(i.MinSize)), - VPCZoneIdentifier: aws.String(strings.Join(i.Subnets, ", ")), - DefaultCooldown: aws.Int64(int64(i.DefaultCoolDown.Duration.Seconds())), - DefaultInstanceWarmup: aws.Int64(int64(i.DefaultInstanceWarmup.Duration.Seconds())), - CapacityRebalance: aws.Bool(i.CapacityRebalance), + AutoScalingGroupName: aws.String(name), + MaxSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MaxSize)), + MinSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MinSize)), + VPCZoneIdentifier: aws.String(strings.Join(subnets, ", ")), + DefaultCooldown: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.DefaultCoolDown.Duration.Seconds())), + DefaultInstanceWarmup: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.DefaultInstanceWarmup.Duration.Seconds())), + CapacityRebalance: aws.Bool(machinePoolScope.AWSMachinePool.Spec.CapacityRebalance), + LifecycleHookSpecificationList: getLifecycleHookSpecificationList(machinePoolScope.GetLifecycleHooks()), } - if i.DesiredCapacity != nil { - input.DesiredCapacity = aws.Int64(int64(aws.Int32Value(i.DesiredCapacity))) + if desiredCapacity != nil { + input.DesiredCapacity = aws.Int64(int64(aws.Int32Value(desiredCapacity))) } - if i.MixedInstancesPolicy != nil { - input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(i.Name, i.MixedInstancesPolicy) + if machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy != nil { + input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(name, machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy) } else { input.LaunchTemplate = &autoscaling.LaunchTemplateSpecification{ - LaunchTemplateId: aws.String(launchTemplateID), + LaunchTemplateId: aws.String(machinePoolScope.AWSMachinePool.Status.LaunchTemplateID), Version: aws.String(expinfrav1.LaunchTemplateLatestVersion), } } - if i.Tags != nil { - input.Tags = BuildTagsFromMap(i.Name, i.Tags) - } + input.Tags = BuildTagsFromMap(name, infrav1.Build(infrav1.BuildParams{ + ClusterName: s.scope.KubernetesClusterName(), + Lifecycle: infrav1.ResourceLifecycleOwned, + Name: aws.String(name), + Role: aws.String("node"), + Additional: additionalTags, + })) if _, err := s.ASGClient.CreateAutoScalingGroupWithContext(context.TODO(), input); err != nil { - return errors.Wrap(err, "failed to create autoscaling group") + s.scope.Error(err, "unable to create AutoScalingGroup") + return nil, errors.Wrap(err, "failed to create autoscaling group") } - return nil + record.Eventf(machinePoolScope.AWSMachinePool, "SuccessfulCreate", "Created new ASG: %s", machinePoolScope.Name()) + + return nil, nil } // DeleteASGAndWait will delete an ASG and wait until it is deleted. diff --git a/pkg/cloud/services/autoscaling/lifecyclehook.go b/pkg/cloud/services/autoscaling/lifecyclehook.go index b0fa055d96..d869761e77 100644 --- a/pkg/cloud/services/autoscaling/lifecyclehook.go +++ b/pkg/cloud/services/autoscaling/lifecyclehook.go @@ -51,35 +51,34 @@ func (s *Service) DescribeLifecycleHooks(asgName string) ([]*expinfrav1.AWSLifec return hooks, nil } -// CreateLifecycleHook creates a lifecycle hook for the given AutoScalingGroup. -func (s *Service) CreateLifecycleHook(asgName string, hook *expinfrav1.AWSLifecycleHook) error { - input := &autoscaling.PutLifecycleHookInput{ +func getPutLifecycleHookInput(asgName string, hook *expinfrav1.AWSLifecycleHook) (ret *autoscaling.PutLifecycleHookInput) { + ret = &autoscaling.PutLifecycleHookInput{ AutoScalingGroupName: aws.String(asgName), LifecycleHookName: aws.String(hook.Name), LifecycleTransition: aws.String(hook.LifecycleTransition.String()), + + // Optional + RoleARN: hook.RoleARN, + NotificationTargetARN: hook.NotificationTargetARN, + NotificationMetadata: hook.NotificationMetadata, } // Optional parameters if hook.DefaultResult != nil { - input.DefaultResult = aws.String(hook.DefaultResult.String()) + ret.DefaultResult = aws.String(hook.DefaultResult.String()) } if hook.HeartbeatTimeout != nil { timeoutSeconds := hook.HeartbeatTimeout.Duration.Seconds() - input.HeartbeatTimeout = aws.Int64(int64(timeoutSeconds)) + ret.HeartbeatTimeout = aws.Int64(int64(timeoutSeconds)) } - if hook.NotificationTargetARN != nil { - input.NotificationTargetARN = hook.NotificationTargetARN - } - - if hook.RoleARN != nil { - input.RoleARN = hook.RoleARN - } + return +} - if hook.NotificationMetadata != nil { - input.NotificationMetadata = hook.NotificationMetadata - } +// CreateLifecycleHook creates a lifecycle hook for the given AutoScalingGroup. +func (s *Service) CreateLifecycleHook(asgName string, hook *expinfrav1.AWSLifecycleHook) error { + input := getPutLifecycleHookInput(asgName, hook) if _, err := s.ASGClient.PutLifecycleHookWithContext(context.TODO(), input); err != nil { return errors.Wrapf(err, "failed to create lifecycle hook %q for AutoScalingGroup: %q", hook.Name, asgName) @@ -90,33 +89,7 @@ func (s *Service) CreateLifecycleHook(asgName string, hook *expinfrav1.AWSLifecy // UpdateLifecycleHook updates a lifecycle hook for the given AutoScalingGroup. func (s *Service) UpdateLifecycleHook(asgName string, hook *expinfrav1.AWSLifecycleHook) error { - input := &autoscaling.PutLifecycleHookInput{ - AutoScalingGroupName: aws.String(asgName), - LifecycleHookName: aws.String(hook.Name), - LifecycleTransition: aws.String(hook.LifecycleTransition.String()), - } - - // Optional parameters - if hook.DefaultResult != nil { - input.DefaultResult = aws.String(hook.DefaultResult.String()) - } - - if hook.HeartbeatTimeout != nil { - timeoutSeconds := hook.HeartbeatTimeout.Duration.Seconds() - input.HeartbeatTimeout = aws.Int64(int64(timeoutSeconds)) - } - - if hook.NotificationTargetARN != nil { - input.NotificationTargetARN = hook.NotificationTargetARN - } - - if hook.RoleARN != nil { - input.RoleARN = hook.RoleARN - } - - if hook.NotificationMetadata != nil { - input.NotificationMetadata = hook.NotificationMetadata - } + input := getPutLifecycleHookInput(asgName, hook) if _, err := s.ASGClient.PutLifecycleHookWithContext(context.TODO(), input); err != nil { return errors.Wrapf(err, "failed to update lifecycle hook %q for AutoScalingGroup: %q", hook.Name, asgName) @@ -160,6 +133,32 @@ func (s *Service) SDKToLifecycleHook(hook *autoscaling.LifecycleHook) *expinfrav } } +func getLifecycleHookSpecificationList(lifecycleHooks []expinfrav1.AWSLifecycleHook) (ret []*autoscaling.LifecycleHookSpecification) { + for _, hook := range lifecycleHooks { + spec := &autoscaling.LifecycleHookSpecification{ + LifecycleHookName: aws.String(hook.Name), + LifecycleTransition: aws.String(hook.LifecycleTransition.String()), + + // Optional + RoleARN: hook.RoleARN, + NotificationTargetARN: hook.NotificationTargetARN, + NotificationMetadata: hook.NotificationMetadata, + } + + // Optional parameters + if hook.DefaultResult != nil { + spec.DefaultResult = aws.String(hook.DefaultResult.String()) + } + + if hook.HeartbeatTimeout != nil { + timeoutSeconds := hook.HeartbeatTimeout.Duration.Seconds() + spec.HeartbeatTimeout = aws.Int64(int64(timeoutSeconds)) + } + } + + return +} + // ReconcileLifecycleHooks reconciles lifecycle hooks for an ASG // by creating missing hooks, updating mismatching hooks and // deleting extraneous hooks (except those specified in