From 5d047021c8a81b836ed589a372a25baf978c5da7 Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Thu, 10 Oct 2024 16:53:08 +0200 Subject: [PATCH] Support storing Ignition user data in S3 bucket for `AWSMachinePool` --- api/v1beta2/awsmachine_types.go | 3 + api/v1beta2/awsmachine_webhook.go | 7 +- api/v1beta2/tags.go | 6 + .../bootstrap/cluster_api_controller.go | 6 +- .../bootstrap/fixtures/with_s3_bucket.yaml | 6 +- ...ture.cluster.x-k8s.io_awsmachinepools.yaml | 100 +++++++ controllers/awsmachine_controller.go | 6 +- exp/api/v1beta1/conversion.go | 3 + exp/api/v1beta1/zz_generated.conversion.go | 1 + exp/api/v1beta2/awsmachinepool_types.go | 4 + exp/api/v1beta2/awsmachinepool_webhook.go | 35 ++- exp/api/v1beta2/zz_generated.deepcopy.go | 5 + exp/controllers/awsmachinepool_controller.go | 42 ++- .../awsmachinepool_controller_test.go | 246 +++++++++++++++--- .../awsmanagedmachinepool_controller.go | 9 +- pkg/cloud/scope/ignition.go | 26 ++ pkg/cloud/scope/launchtemplate.go | 2 +- pkg/cloud/scope/machinepool.go | 13 +- pkg/cloud/scope/managednodegroup.go | 15 +- pkg/cloud/services/ec2/launchtemplate.go | 207 ++++++++++++--- pkg/cloud/services/ec2/launchtemplate_test.go | 79 +++--- pkg/cloud/services/interfaces.go | 13 +- .../mock_services/ec2_interface_mock.go | 31 ++- .../objectstore_machine_interface_mock.go | 29 +++ .../mock_services/reconcile_interface_mock.go | 8 +- pkg/cloud/services/s3/s3.go | 201 +++++++++++++- pkg/cloud/services/s3/s3_test.go | 38 ++- 27 files changed, 945 insertions(+), 196 deletions(-) create mode 100644 pkg/cloud/scope/ignition.go diff --git a/api/v1beta2/awsmachine_types.go b/api/v1beta2/awsmachine_types.go index 39a649a0e5..b14a3587e9 100644 --- a/api/v1beta2/awsmachine_types.go +++ b/api/v1beta2/awsmachine_types.go @@ -30,6 +30,9 @@ const ( // DefaultIgnitionVersion represents default Ignition version generated for machine userdata. DefaultIgnitionVersion = "2.3" + + // DefaultIgnitionStorageType represents the default storage type of Ignition userdata + DefaultIgnitionStorageType = IgnitionStorageTypeOptionClusterObjectStore ) // SecretBackend defines variants for backend secret storage. diff --git a/api/v1beta2/awsmachine_webhook.go b/api/v1beta2/awsmachine_webhook.go index 50af4f2211..b99c8015a5 100644 --- a/api/v1beta2/awsmachine_webhook.go +++ b/api/v1beta2/awsmachine_webhook.go @@ -399,12 +399,11 @@ func (r *AWSMachine) Default() { } if r.ignitionEnabled() && r.Spec.Ignition.Version == "" { - if r.Spec.Ignition == nil { - r.Spec.Ignition = &Ignition{} - } - r.Spec.Ignition.Version = DefaultIgnitionVersion } + if r.ignitionEnabled() && r.Spec.Ignition.StorageType == "" { + r.Spec.Ignition.StorageType = IgnitionStorageTypeOptionClusterObjectStore + } } func (r *AWSMachine) validateAdditionalSecurityGroups() field.ErrorList { diff --git a/api/v1beta2/tags.go b/api/v1beta2/tags.go index e6e0ea7e73..45bc371a49 100644 --- a/api/v1beta2/tags.go +++ b/api/v1beta2/tags.go @@ -195,6 +195,12 @@ const ( // of the bootstrap secret that was used to create the user data for the latest launch // template version. LaunchTemplateBootstrapDataSecret = NameAWSProviderPrefix + "bootstrap-data-secret" + + // LaunchTemplateBootstrapDataHash is the tag we use to store the hash of the raw bootstrap data. + // If bootstrap data is stored in S3, this hash relates to that data, not to the EC2 instance + // user data which only references the S3 object. We store this tag on launch template versions + // so that S3 bootstrap data objects can be deleted when they get outdated. + LaunchTemplateBootstrapDataHash = NameAWSProviderPrefix + "bootstrap-data-hash" ) // ClusterTagKey generates the key for resources associated with a cluster. diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go b/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go index 049de10431..080f3f8226 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go +++ b/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go @@ -291,11 +291,13 @@ func (t Template) ControllersPolicy() *iamv1.PolicyDocument { Action: iamv1.Actions{ "s3:CreateBucket", "s3:DeleteBucket", - "s3:GetObject", - "s3:PutObject", "s3:DeleteObject", + "s3:GetObject", + "s3:ListBucket", "s3:PutBucketPolicy", "s3:PutBucketTagging", + "s3:PutLifecycleConfiguration", + "s3:PutObject", }, }) } diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml index b376d7cab8..ff9b37fe5e 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml @@ -297,11 +297,13 @@ Resources: - Action: - s3:CreateBucket - s3:DeleteBucket - - s3:GetObject - - s3:PutObject - s3:DeleteObject + - s3:GetObject + - s3:ListBucket - s3:PutBucketPolicy - s3:PutBucketTagging + - s3:PutLifecycleConfiguration + - s3:PutObject Effect: Allow Resource: - arn:*:s3:::cluster-api-provider-aws-* diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml index e70f544535..778030c456 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml @@ -883,6 +883,106 @@ spec: after it enters the InService state. If no value is supplied by user a default value of 300 seconds is set type: string + ignition: + description: Ignition defined options related to the bootstrapping + systems where Ignition is used. + properties: + proxy: + description: |- + Proxy defines proxy settings for Ignition. + Only valid for Ignition versions 3.1 and above. + properties: + httpProxy: + description: |- + HTTPProxy is the HTTP proxy to use for Ignition. + A single URL that specifies the proxy server to use for HTTP and HTTPS requests, + unless overridden by the HTTPSProxy or NoProxy options. + type: string + httpsProxy: + description: |- + HTTPSProxy is the HTTPS proxy to use for Ignition. + A single URL that specifies the proxy server to use for HTTPS requests, + unless overridden by the NoProxy option. + type: string + noProxy: + description: |- + NoProxy is the list of domains to not proxy for Ignition. + Specifies a list of strings to hosts that should be excluded from proxying. + + + Each value is represented by: + - An IP address prefix (1.2.3.4) + - An IP address prefix in CIDR notation (1.2.3.4/8) + - A domain name + - A domain name matches that name and all subdomains + - A domain name with a leading . matches subdomains only + - A special DNS label (*), indicates that no proxying should be done + + + An IP address prefix and domain name can also include a literal port number (1.2.3.4:80). + items: + description: IgnitionNoProxy defines the list of domains + to not proxy for Ignition. + maxLength: 2048 + type: string + maxItems: 64 + type: array + type: object + storageType: + default: ClusterObjectStore + description: |- + StorageType defines how to store the boostrap user data for Ignition. + This can be used to instruct Ignition from where to fetch the user data to bootstrap an instance. + + + When omitted, the storage option will default to ClusterObjectStore. + + + When set to "ClusterObjectStore", if the capability is available and a Cluster ObjectStore configuration + is correctly provided in the Cluster object (under .spec.s3Bucket), + an object store will be used to store bootstrap user data. + + + When set to "UnencryptedUserData", EC2 Instance User Data will be used to store the machine bootstrap user data, unencrypted. + This option is considered less secure than others as user data may contain sensitive informations (keys, certificates, etc.) + and users with ec2:DescribeInstances permission or users running pods + that can access the ec2 metadata service have access to this sensitive information. + So this is only to be used at ones own risk, and only when other more secure options are not viable. + enum: + - ClusterObjectStore + - UnencryptedUserData + type: string + tls: + description: |- + TLS defines TLS settings for Ignition. + Only valid for Ignition versions 3.1 and above. + properties: + certificateAuthorities: + description: |- + CASources defines the list of certificate authorities to use for Ignition. + The value is the certificate bundle (in PEM format). The bundle can contain multiple concatenated certificates. + Supported schemes are http, https, tftp, s3, arn, gs, and `data` (RFC 2397) URL scheme. + items: + description: IgnitionCASource defines the source of the + certificate authority to use for Ignition. + maxLength: 65536 + type: string + maxItems: 64 + type: array + type: object + version: + default: "2.3" + description: Version defines which version of Ignition will be + used to generate bootstrap data. + enum: + - "2.3" + - "3.0" + - "3.1" + - "3.2" + - "3.3" + - "3.4" + type: string + type: object maxSize: default: 1 description: MaxSize defines the maximum size of the group. diff --git a/controllers/awsmachine_controller.go b/controllers/awsmachine_controller.go index 7b2f94fd87..91fc0e3607 100644 --- a/controllers/awsmachine_controller.go +++ b/controllers/awsmachine_controller.go @@ -739,7 +739,7 @@ func (r *AWSMachineReconciler) resolveUserData(machineScope *scope.MachineScope, if machineScope.UseIgnition(userDataFormat) { var ignitionStorageType infrav1.IgnitionStorageTypeOption if machineScope.AWSMachine.Spec.Ignition == nil { - ignitionStorageType = infrav1.IgnitionStorageTypeOptionClusterObjectStore + ignitionStorageType = infrav1.DefaultIgnitionStorageType } else { ignitionStorageType = machineScope.AWSMachine.Spec.Ignition.StorageType } @@ -795,8 +795,8 @@ func (r *AWSMachineReconciler) cloudInitUserData(machineScope *scope.MachineScop // then returns the config to instruct ignition on how to pull the user data from the bucket. func (r *AWSMachineReconciler) generateIgnitionWithRemoteStorage(scope *scope.MachineScope, objectStoreSvc services.ObjectStoreInterface, userData []byte) ([]byte, error) { if objectStoreSvc == nil { - return nil, errors.New("using Ignition by default requires a cluster wide object storage configured at `AWSCluster.Spec.Ignition.S3Bucket`. " + - "You must configure one or instruct Ignition to use EC2 user data instead, by setting `AWSMachine.Spec.Ignition.StorageType` to `UnencryptedUserData`") + return nil, errors.New("using Ignition by default requires a cluster wide object storage configured at `AWSCluster.spec.s3Bucket`. " + + "You must configure one or instruct Ignition to use EC2 user data instead, by setting `AWSMachine.spec.ignition.storageType` to `UnencryptedUserData`") } objectURL, err := objectStoreSvc.Create(scope, userData) diff --git a/exp/api/v1beta1/conversion.go b/exp/api/v1beta1/conversion.go index 7c39f1fcbd..fa16ace4ab 100644 --- a/exp/api/v1beta1/conversion.go +++ b/exp/api/v1beta1/conversion.go @@ -52,6 +52,9 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error { if restored.Spec.AvailabilityZoneSubnetType != nil { dst.Spec.AvailabilityZoneSubnetType = restored.Spec.AvailabilityZoneSubnetType } + if restored.Spec.Ignition != nil { + dst.Spec.Ignition = restored.Spec.Ignition + } if restored.Spec.AWSLaunchTemplate.PrivateDNSName != nil { dst.Spec.AWSLaunchTemplate.PrivateDNSName = restored.Spec.AWSLaunchTemplate.PrivateDNSName diff --git a/exp/api/v1beta1/zz_generated.conversion.go b/exp/api/v1beta1/zz_generated.conversion.go index 585cbd1504..c09131cc71 100644 --- a/exp/api/v1beta1/zz_generated.conversion.go +++ b/exp/api/v1beta1/zz_generated.conversion.go @@ -565,6 +565,7 @@ func autoConvert_v1beta2_AWSMachinePoolSpec_To_v1beta1_AWSMachinePoolSpec(in *v1 } out.CapacityRebalance = in.CapacityRebalance // WARNING: in.SuspendProcesses requires manual conversion: does not exist in peer-type + // WARNING: in.Ignition requires manual conversion: does not exist in peer-type return nil } diff --git a/exp/api/v1beta2/awsmachinepool_types.go b/exp/api/v1beta2/awsmachinepool_types.go index 526876bcfd..62e81e7c7a 100644 --- a/exp/api/v1beta2/awsmachinepool_types.go +++ b/exp/api/v1beta2/awsmachinepool_types.go @@ -101,6 +101,10 @@ type AWSMachinePoolSpec struct { // SuspendProcesses defines a list of processes to suspend for the given ASG. This is constantly reconciled. // If a process is removed from this list it will automatically be resumed. SuspendProcesses *SuspendProcessesTypes `json:"suspendProcesses,omitempty"` + + // Ignition defined options related to the bootstrapping systems where Ignition is used. + // +optional + Ignition *infrav1.Ignition `json:"ignition,omitempty"` } // SuspendProcessesTypes contains user friendly auto-completable values for suspended process names. diff --git a/exp/api/v1beta2/awsmachinepool_webhook.go b/exp/api/v1beta2/awsmachinepool_webhook.go index a4f6a44d41..0395f1d0e3 100644 --- a/exp/api/v1beta2/awsmachinepool_webhook.go +++ b/exp/api/v1beta2/awsmachinepool_webhook.go @@ -27,7 +27,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" ) var log = ctrl.Log.WithName("awsmachinepool-resource") @@ -62,12 +63,12 @@ func (r *AWSMachinePool) validateRootVolume() field.ErrorList { return allErrs } - if v1beta2.VolumeTypesProvisioned.Has(string(r.Spec.AWSLaunchTemplate.RootVolume.Type)) && r.Spec.AWSLaunchTemplate.RootVolume.IOPS == 0 { + if infrav1.VolumeTypesProvisioned.Has(string(r.Spec.AWSLaunchTemplate.RootVolume.Type)) && r.Spec.AWSLaunchTemplate.RootVolume.IOPS == 0 { allErrs = append(allErrs, field.Required(field.NewPath("spec.awsLaunchTemplate.rootVolume.iops"), "iops required if type is 'io1' or 'io2'")) } if r.Spec.AWSLaunchTemplate.RootVolume.Throughput != nil { - if r.Spec.AWSLaunchTemplate.RootVolume.Type != v1beta2.VolumeTypeGP3 { + if r.Spec.AWSLaunchTemplate.RootVolume.Type != infrav1.VolumeTypeGP3 { allErrs = append(allErrs, field.Required(field.NewPath("spec.awsLaunchTemplate.rootVolume.throughput"), "throughput is valid only for type 'gp3'")) } if *r.Spec.AWSLaunchTemplate.RootVolume.Throughput < 0 { @@ -86,12 +87,12 @@ func (r *AWSMachinePool) validateNonRootVolumes() field.ErrorList { var allErrs field.ErrorList for _, volume := range r.Spec.AWSLaunchTemplate.NonRootVolumes { - if v1beta2.VolumeTypesProvisioned.Has(string(volume.Type)) && volume.IOPS == 0 { + if infrav1.VolumeTypesProvisioned.Has(string(volume.Type)) && volume.IOPS == 0 { allErrs = append(allErrs, field.Required(field.NewPath("spec.template.spec.nonRootVolumes.iops"), "iops required if type is 'io1' or 'io2'")) } if volume.Throughput != nil { - if volume.Type != v1beta2.VolumeTypeGP3 { + if volume.Type != infrav1.VolumeTypeGP3 { allErrs = append(allErrs, field.Required(field.NewPath("spec.template.spec.nonRootVolumes.throughput"), "throughput is valid only for type 'gp3'")) } if *volume.Throughput < 0 { @@ -162,6 +163,22 @@ func (r *AWSMachinePool) validateRefreshPreferences() field.ErrorList { return allErrs } +func (r *AWSMachinePool) ignitionEnabled() bool { + return r.Spec.Ignition != nil +} + +func (r *AWSMachinePool) validateIgnition() field.ErrorList { + var allErrs field.ErrorList + + // Feature gate is not enabled but ignition is enabled then send a forbidden error. + if !feature.Gates.Enabled(feature.BootstrapFormatIgnition) && r.ignitionEnabled() { + allErrs = append(allErrs, field.Forbidden(field.NewPath("spec", "ignition"), + "can be set only if the BootstrapFormatIgnition feature gate is enabled")) + } + + return allErrs +} + // ValidateCreate will do any extra validation when creating a AWSMachinePool. func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) { log.Info("AWSMachinePool validate create", "machine-pool", klog.KObj(r)) @@ -176,6 +193,7 @@ func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) { allErrs = append(allErrs, r.validateAdditionalSecurityGroups()...) allErrs = append(allErrs, r.validateSpotInstances()...) allErrs = append(allErrs, r.validateRefreshPreferences()...) + allErrs = append(allErrs, r.validateIgnition()...) if len(allErrs) == 0 { return nil, nil @@ -226,4 +244,11 @@ func (r *AWSMachinePool) Default() { log.Info("DefaultInstanceWarmup is zero, setting 300 seconds as default") r.Spec.DefaultInstanceWarmup.Duration = 300 * time.Second } + + if r.ignitionEnabled() && r.Spec.Ignition.Version == "" { + r.Spec.Ignition.Version = infrav1.DefaultIgnitionVersion + } + if r.ignitionEnabled() && r.Spec.Ignition.StorageType == "" { + r.Spec.Ignition.StorageType = infrav1.DefaultIgnitionStorageType + } } diff --git a/exp/api/v1beta2/zz_generated.deepcopy.go b/exp/api/v1beta2/zz_generated.deepcopy.go index f34e8f9d9b..6c0f077766 100644 --- a/exp/api/v1beta2/zz_generated.deepcopy.go +++ b/exp/api/v1beta2/zz_generated.deepcopy.go @@ -277,6 +277,11 @@ func (in *AWSMachinePoolSpec) DeepCopyInto(out *AWSMachinePoolSpec) { *out = new(SuspendProcessesTypes) (*in).DeepCopyInto(*out) } + if in.Ignition != nil { + in, out := &in.Ignition, &out.Ignition + *out = new(apiv1beta2.Ignition) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AWSMachinePoolSpec. diff --git a/exp/controllers/awsmachinepool_controller.go b/exp/controllers/awsmachinepool_controller.go index 741cdcdb10..daab66a835 100644 --- a/exp/controllers/awsmachinepool_controller.go +++ b/exp/controllers/awsmachinepool_controller.go @@ -46,6 +46,7 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" asg "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/autoscaling" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/ec2" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" @@ -63,6 +64,7 @@ type AWSMachinePoolReconciler struct { asgServiceFactory func(cloud.ClusterScoper) services.ASGInterface ec2ServiceFactory func(scope.EC2Scope) services.EC2Interface reconcileServiceFactory func(scope.EC2Scope) services.MachinePoolReconcileInterface + objectStoreServiceFactory func(scope.S3Scope) services.ObjectStoreInterface TagUnmanagedNetworkResources bool } @@ -89,6 +91,19 @@ func (r *AWSMachinePoolReconciler) getReconcileService(scope scope.EC2Scope) ser return ec2.NewService(scope) } +func (r *AWSMachinePoolReconciler) getObjectStoreService(scope scope.S3Scope) services.ObjectStoreInterface { + if scope.Bucket() == nil { + // S3 bucket usage not enabled, so object store service not needed + return nil + } + + if r.objectStoreServiceFactory != nil { + return r.objectStoreServiceFactory(scope) + } + + return s3.NewService(scope) +} + // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools,verbs=get;list;watch;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools/status,verbs=get;update;patch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;patch @@ -130,7 +145,7 @@ func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reque log = log.WithValues("cluster", klog.KObj(cluster)) - infraCluster, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool) + infraCluster, s3Scope, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool) if err != nil { return ctrl.Result{}, fmt.Errorf("getting infra provider cluster or control plane object: %w", err) } @@ -178,13 +193,13 @@ func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) + return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) case *scope.ClusterScope: if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) + return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) default: return ctrl.Result{}, errors.New("infraCluster has unknown type") } @@ -202,7 +217,7 @@ func (r *AWSMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctr Complete(r) } -func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { +func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope, s3Scope scope.S3Scope) error { clusterScope.Info("Reconciling AWSMachinePool") // If the AWSMachine is in an error state, return early. @@ -238,6 +253,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP ec2Svc := r.getEC2Service(ec2Scope) asgsvc := r.getASGService(clusterScope) reconSvc := r.getReconcileService(ec2Scope) + objectStoreSvc := r.getObjectStoreService(s3Scope) // Find existing ASG asg, err := r.findASG(machinePoolScope, asgsvc) @@ -280,7 +296,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP machinePoolScope.Info("starting instance refresh", "number of instances", machinePoolScope.MachinePool.Spec.Replicas) return asgsvc.StartASGInstanceRefresh(machinePoolScope) } - if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, ec2Svc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { + if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2Svc, objectStoreSvc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) machinePoolScope.Error(err, "failed to reconcile launch template") return err @@ -389,7 +405,7 @@ func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.Machi } launchTemplateID := machinePoolScope.AWSMachinePool.Status.LaunchTemplateID - launchTemplate, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) + launchTemplate, _, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) //nolint:dogsled if err != nil { return err } @@ -607,7 +623,7 @@ func machinePoolToInfrastructureMapFunc(gvk schema.GroupVersionKind) handler.Map } } -func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *logger.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, error) { +func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *logger.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, scope.S3Scope, error) { var clusterScope *scope.ClusterScope var managedControlPlaneScope *scope.ManagedControlPlaneScope var err error @@ -621,7 +637,7 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log if err := r.Get(ctx, controlPlaneName, controlPlane); err != nil { // AWSManagedControlPlane is not ready - return nil, nil //nolint:nilerr + return nil, nil, nil //nolint:nilerr } managedControlPlaneScope, err = scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{ @@ -633,10 +649,10 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log TagUnmanagedNetworkResources: r.TagUnmanagedNetworkResources, }) if err != nil { - return nil, err + return nil, nil, err } - return managedControlPlaneScope, nil + return managedControlPlaneScope, managedControlPlaneScope, nil } awsCluster := &infrav1.AWSCluster{} @@ -648,7 +664,7 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log if err := r.Client.Get(ctx, infraClusterName, awsCluster); err != nil { // AWSCluster is not ready - return nil, nil //nolint:nilerr + return nil, nil, nil //nolint:nilerr } // Create the cluster scope @@ -661,8 +677,8 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log TagUnmanagedNetworkResources: r.TagUnmanagedNetworkResources, }) if err != nil { - return nil, err + return nil, nil, err } - return clusterScope, nil + return clusterScope, clusterScope, nil } diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index 4902dbb7e7..1e6e8aea0e 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -19,11 +19,14 @@ package controllers import ( "bytes" "context" + "encoding/base64" "flag" "fmt" "testing" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/s3" "github.com/go-logr/logr" "github.com/golang/mock/gomock" . "github.com/onsi/gomega" @@ -33,16 +36,21 @@ import ( "k8s.io/apimachinery/pkg/runtime" apimachinerytypes "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" + utilfeature "k8s.io/component-base/featuregate/testing" "k8s.io/klog/v2" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client/fake" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/mock_services" + s3svc "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3/mock_s3iface" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3/mock_stsiface" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -61,6 +69,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc *mock_services.MockEC2Interface asgSvc *mock_services.MockASGInterface reconSvc *mock_services.MockMachinePoolReconcileInterface + s3Mock *mock_s3iface.MockS3API + stsMock *mock_stsiface.MockSTSAPI recorder *record.FakeRecorder awsMachinePool *expinfrav1.AWSMachinePool secret *corev1.Secret @@ -158,6 +168,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc = mock_services.NewMockEC2Interface(mockCtrl) asgSvc = mock_services.NewMockASGInterface(mockCtrl) reconSvc = mock_services.NewMockMachinePoolReconcileInterface(mockCtrl) + s3Mock = mock_s3iface.NewMockS3API(mockCtrl) + stsMock = mock_stsiface.NewMockSTSAPI(mockCtrl) // If the test hangs for 9 minutes, increase the value here to the number of events during a reconciliation loop recorder = record.NewFakeRecorder(2) @@ -172,6 +184,12 @@ func TestAWSMachinePoolReconciler(t *testing.T) { reconcileServiceFactory: func(scope.EC2Scope) services.MachinePoolReconcileInterface { return reconSvc }, + objectStoreServiceFactory: func(scope scope.S3Scope) services.ObjectStoreInterface { + svc := s3svc.NewService(scope) + svc.S3Client = s3Mock + svc.STSClient = stsMock + return svc + }, Recorder: recorder, } } @@ -195,7 +213,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG := func(t *testing.T, g *WithT) { t.Helper() - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, expectedErr).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, expectedErr).AnyTimes() asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, expectedErr).AnyTimes() } t.Run("should exit immediately on an error state", func(t *testing.T) { @@ -211,7 +229,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(buf).To(ContainSubstring("Error state detected, skipping reconciliation")) }) t.Run("should add our finalizer to the machinepool", func(t *testing.T) { @@ -220,7 +238,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) getASG(t, g) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(ms.AWSMachinePool.Finalizers).To(ContainElement(expinfrav1.MachinePoolFinalizer)) }) @@ -235,7 +253,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Cluster infrastructure is not ready yet")) expectConditions(g, ms.AWSMachinePool, []conditionAssertion{{expinfrav1.ASGReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForClusterInfrastructureReason}}) @@ -250,7 +268,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Bootstrap data secret reference is not yet available")) @@ -266,7 +284,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG := func(t *testing.T, g *WithT) { t.Helper() - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil).AnyTimes() } t.Run("should look up by provider ID when one exists", func(t *testing.T) { @@ -277,8 +295,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG(t, g) expectedErr := errors.New("no connection available ") - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(expectedErr) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(expectedErr) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(errors.Cause(err)).To(MatchError(expectedErr)) }) }) @@ -298,14 +316,14 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", }, nil) asgSvc.EXPECT().SuspendProcesses("name", []string{"Launch", "Terminate"}).Return(nil).AnyTimes().Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -322,7 +340,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) ms.AWSMachinePool.Spec.SuspendProcesses.All = true - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -341,7 +359,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { "ReplaceUnhealthy", })).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -362,7 +380,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -373,7 +391,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { asgSvc.EXPECT().SuspendProcesses("name", []string{"Terminate"}).Return(nil).AnyTimes().Times(1) asgSvc.EXPECT().ResumeProcesses("name", []string{"process3"}).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -387,7 +405,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Name: "an-asg", DesiredCapacity: ptr.To[int32](1), } - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil) asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil) @@ -400,7 +418,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { g.Expect(testEnv.Create(ctx, ms.MachinePool)).To(Succeed()) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(*ms.MachinePool.Spec.Replicas).To(Equal(int32(1))) }) t.Run("No need to update Asg because asgNeedsUpdates is false and no subnets change", func(t *testing.T) { @@ -425,13 +443,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, }, Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet2", "subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to subnet changes", func(t *testing.T) { @@ -443,13 +461,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(100), Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to asgNeedsUpdates returns true", func(t *testing.T) { @@ -461,13 +479,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(2), Subnets: []string{}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -481,9 +499,9 @@ func TestAWSMachinePoolReconciler(t *testing.T) { reconSvc = nil // not used defer teardown(t, g) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil) + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-abcdef123"), nil) - ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script"))).Return("lt-ghijkl456", nil) + ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script")), gomock.Eq(userdata.ComputeHash([]byte("shell-script")))).Return("lt-ghijkl456", nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { g.Expect(scope.Name()).To(Equal("test")) @@ -492,7 +510,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -517,6 +535,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data userdata.ComputeHash([]byte("shell-script")), &userDataSecretKey, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) // no change ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) @@ -539,7 +558,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -564,12 +583,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data userdata.ComputeHash([]byte("shell-script")), &userDataSecretKey, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-different"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-different")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-different")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // AMI change should trigger rolling out new nodes asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()) @@ -592,7 +612,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -618,12 +638,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { userdata.ComputeHash([]byte("shell-script")), // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config @@ -648,7 +669,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -659,9 +680,9 @@ func TestAWSMachinePoolReconciler(t *testing.T) { reconSvc = nil // not used defer teardown(t, g) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil) + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-abcdef123"), nil) - ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script"))).Return("lt-ghijkl456", nil) + ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script")), gomock.Eq(userdata.ComputeHash([]byte("shell-script")))).Return("lt-ghijkl456", nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { g.Expect(scope.Name()).To(Equal("test")) @@ -670,7 +691,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) g.Expect(ms.AWSMachinePool.Status.LaunchTemplateID).ToNot(BeEmpty()) @@ -702,12 +723,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data content userdata.ComputeHash([]byte("shell-script")), &apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data-new"}), gomock.Any()).Return(nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data-new"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config @@ -732,7 +754,151 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + g.Expect(err).To(Succeed()) + }) + + t.Run("launch template and ASG exist, bootstrap data secret name changed, Ignition bootstrap data stored in S3", func(t *testing.T) { + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() + + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + + secret.Data["format"] = []byte("ignition") + g.Expect(testEnv.Update(ctx, secret)).To(Succeed()) + + // Latest ID and version already stored, no need to retrieve it + ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting + ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1") + + // Enable Ignition S3 storage + cs.AWSCluster.Spec.S3Bucket = &infrav1.S3Bucket{} + ms.AWSMachinePool.Spec.Ignition = &infrav1.Ignition{} + ms.AWSMachinePool.Default() // simulate webhook that sets default ignition version + + asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { + g.Expect(scope.Name()).To(Equal("test")) + + // No difference to `AWSMachinePool.spec` + return &expinfrav1.AutoScalingGroup{ + Name: scope.Name(), + Subnets: []string{ + "subnet-1", + }, + MinSize: awsMachinePool.Spec.MinSize, + MaxSize: awsMachinePool.Spec.MaxSize, + MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(), + }, nil + }) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( + &expinfrav1.AWSLaunchTemplate{ + Name: "test", + AMI: infrav1.AMIReference{ + ID: ptr.To[string]("ami-existing"), + }, + }, + // No change to user data + userdata.ComputeHash([]byte("shell-script")), + // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` + &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, + nil) + ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) + ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) + + s3Mock.EXPECT().PutObject(gomock.Any()).DoAndReturn(func(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", userdata.ComputeHash([]byte("shell-script"))))) + return &s3.PutObjectOutput{}, nil + }) + + // Simulate a pending instance refresh + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(false, nil) + + err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + g.Expect(err).To(HaveOccurred()) + expectConditions(g, ms.AWSMachinePool, []conditionAssertion{{expinfrav1.PreLaunchTemplateUpdateCheckCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, expinfrav1.PreLaunchTemplateUpdateCheckFailedReason}}) + + // Now simulate that no pending instance refresh exists + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) + + asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { + g.Expect(scope.Name()).To(Equal("test")) + + // No difference to `AWSMachinePool.spec` + return &expinfrav1.AutoScalingGroup{ + Name: scope.Name(), + Subnets: []string{ + "subnet-1", + }, + MinSize: awsMachinePool.Spec.MinSize, + MaxSize: awsMachinePool.Spec.MaxSize, + MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(), + }, nil + }) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( + &expinfrav1.AWSLaunchTemplate{ + Name: "test", + AMI: infrav1.AMIReference{ + ID: ptr.To[string]("ami-existing"), + }, + }, + // No change to user data + userdata.ComputeHash([]byte("shell-script")), + // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` + &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, + nil) + ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) + ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) + + s3Mock.EXPECT().PutObject(gomock.Any()).DoAndReturn(func(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", userdata.ComputeHash([]byte("shell-script"))))) + return &s3.PutObjectOutput{}, nil + }) + + var simulatedDeletedVersionNumber int64 = 777 + bootstrapDataHash := "some-simulated-hash" + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(&ec2.LaunchTemplateVersion{ + VersionNumber: &simulatedDeletedVersionNumber, + LaunchTemplateData: &ec2.ResponseLaunchTemplateData{ + TagSpecifications: []*ec2.LaunchTemplateTagSpecification{ + { + ResourceType: aws.String(ec2.ResourceTypeInstance), + Tags: []*ec2.Tag{ + // Only this tag is relevant for the test. If this is stored in the + // launch template version, and the version gets deleted, the S3 object + // with the bootstrap data should be deleted as well. + { + Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-hash"), + Value: aws.String(bootstrapDataHash), + }, + }, + }, + }, + UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("old-user-data"))), + }, + }, nil) + s3Mock.EXPECT().DeleteObject(gomock.Any()).DoAndReturn(func(input *s3.DeleteObjectInput) (*s3.DeleteObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", bootstrapDataHash))) + return &s3.DeleteObjectOutput{}, nil + }) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) + ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) + // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the + // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config + // reference (`MachinePool.spec.template.spec.bootstrap`). + asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()) + + asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet-1"}, nil) // no change + // No changes, so there must not be an ASG update! + asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) + + err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -766,7 +932,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { finalizer(t, g) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() buf := new(bytes.Buffer) klog.SetOutput(buf) @@ -788,7 +954,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Status: expinfrav1.ASGStatusDeleteInProgress, } asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&inProgressASG, nil) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() buf := new(bytes.Buffer) klog.SetOutput(buf) diff --git a/exp/controllers/awsmanagedmachinepool_controller.go b/exp/controllers/awsmanagedmachinepool_controller.go index 8c0d75c2ec..3e4aa0924a 100644 --- a/exp/controllers/awsmanagedmachinepool_controller.go +++ b/exp/controllers/awsmanagedmachinepool_controller.go @@ -156,6 +156,7 @@ func (r *AWSManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr } machinePoolScope, err := scope.NewManagedMachinePoolScope(scope.ManagedMachinePoolScopeParams{ + Logger: log, Client: r.Client, ControllerName: "awsmanagedmachinepool", Cluster: cluster, @@ -189,13 +190,14 @@ func (r *AWSManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, r.reconcileDelete(ctx, machinePoolScope, managedControlPlaneScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, managedControlPlaneScope) + return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, managedControlPlaneScope, managedControlPlaneScope) } func (r *AWSManagedMachinePoolReconciler) reconcileNormal( ctx context.Context, machinePoolScope *scope.ManagedMachinePoolScope, ec2Scope scope.EC2Scope, + s3Scope scope.S3Scope, ) error { machinePoolScope.Info("Reconciling AWSManagedMachinePool") @@ -216,7 +218,8 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( runPostLaunchTemplateUpdateOperation := func() error { return nil } - if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, ec2svc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { + var objectStoreSvc services.ObjectStoreInterface // nil because no S3 bucket support for `AWSManagedControlPlane` yet + if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2svc, objectStoreSvc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { r.Recorder.Eventf(machinePoolScope.ManagedMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) machinePoolScope.Error(err, "failed to reconcile launch template") conditions.MarkFalse(machinePoolScope.ManagedMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, "") @@ -259,7 +262,7 @@ func (r *AWSManagedMachinePoolReconciler) reconcileDelete( if machinePoolScope.ManagedMachinePool.Spec.AWSLaunchTemplate != nil { launchTemplateID := machinePoolScope.ManagedMachinePool.Status.LaunchTemplateID - launchTemplate, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) + launchTemplate, _, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) if err != nil { return err } diff --git a/pkg/cloud/scope/ignition.go b/pkg/cloud/scope/ignition.go new file mode 100644 index 0000000000..d996612181 --- /dev/null +++ b/pkg/cloud/scope/ignition.go @@ -0,0 +1,26 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" +) + +// IgnitionScope gets the optional Ignition configuration. +type IgnitionScope interface { + Ignition() *infrav1.Ignition +} diff --git a/pkg/cloud/scope/launchtemplate.go b/pkg/cloud/scope/launchtemplate.go index fb2df8b59f..34e84e7ff7 100644 --- a/pkg/cloud/scope/launchtemplate.go +++ b/pkg/cloud/scope/launchtemplate.go @@ -37,7 +37,7 @@ type LaunchTemplateScope interface { SetLaunchTemplateIDStatus(id string) GetLaunchTemplateLatestVersionStatus() string SetLaunchTemplateLatestVersionStatus(version string) - GetRawBootstrapData() ([]byte, *types.NamespacedName, error) + GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) IsEKSManaged() bool AdditionalTags() infrav1.Tags diff --git a/pkg/cloud/scope/machinepool.go b/pkg/cloud/scope/machinepool.go index 00e8abeadc..d141aef06f 100644 --- a/pkg/cloud/scope/machinepool.go +++ b/pkg/cloud/scope/machinepool.go @@ -121,6 +121,11 @@ func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, erro }, nil } +// Ignition gets the ignition config. +func (m *MachinePoolScope) Ignition() *infrav1.Ignition { + return m.AWSMachinePool.Spec.Ignition +} + // Name returns the AWSMachinePool name. func (m *MachinePoolScope) Name() string { return m.AWSMachinePool.Name @@ -133,13 +138,7 @@ func (m *MachinePoolScope) Namespace() string { // GetRawBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName, // including the secret's namespaced name. -func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, *types.NamespacedName, error) { - data, _, bootstrapDataSecretKey, err := m.getBootstrapData() - - return data, bootstrapDataSecretKey, err -} - -func (m *MachinePoolScope) getBootstrapData() ([]byte, string, *types.NamespacedName, error) { +func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) { if m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { return nil, "", nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } diff --git a/pkg/cloud/scope/managednodegroup.go b/pkg/cloud/scope/managednodegroup.go index e9421d7282..5d35fad215 100644 --- a/pkg/cloud/scope/managednodegroup.go +++ b/pkg/cloud/scope/managednodegroup.go @@ -305,6 +305,11 @@ func (s *ManagedMachinePoolScope) ControllerName() string { return s.controllerName } +// Ignition gets the ignition config. +func (s *ManagedMachinePoolScope) Ignition() *infrav1.Ignition { + return nil +} + // KubernetesClusterName is the name of the EKS cluster name. func (s *ManagedMachinePoolScope) KubernetesClusterName() string { return s.ControlPlane.Spec.EKSClusterName @@ -326,24 +331,24 @@ func (s *ManagedMachinePoolScope) Namespace() string { } // GetRawBootstrapData returns the raw bootstrap data from the linked Machine's bootstrap.dataSecretName. -func (s *ManagedMachinePoolScope) GetRawBootstrapData() ([]byte, *types.NamespacedName, error) { +func (s *ManagedMachinePoolScope) GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) { if s.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { - return nil, nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") + return nil, "", nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } secret := &corev1.Secret{} key := types.NamespacedName{Namespace: s.Namespace(), Name: *s.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName} if err := s.Client.Get(context.TODO(), key, secret); err != nil { - return nil, nil, errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSManagedMachinePool %s/%s", s.Namespace(), s.Name()) + return nil, "", nil, errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSManagedMachinePool %s/%s", s.Namespace(), s.Name()) } value, ok := secret.Data["value"] if !ok { - return nil, nil, errors.New("error retrieving bootstrap data: secret value key is missing") + return nil, "", nil, errors.New("error retrieving bootstrap data: secret value key is missing") } - return value, &key, nil + return value, string(secret.Data["format"]), &key, nil } // GetObjectMeta returns the ObjectMeta for the AWSManagedMachinePool. diff --git a/pkg/cloud/services/ec2/launchtemplate.go b/pkg/cloud/services/ec2/launchtemplate.go index 5da57f2521..26544ba720 100644 --- a/pkg/cloud/services/ec2/launchtemplate.go +++ b/pkg/cloud/services/ec2/launchtemplate.go @@ -26,6 +26,9 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/blang/semver" + ignTypes "github.com/coreos/ignition/config/v2_3/types" + ignV3Types "github.com/coreos/ignition/v2/config/v3_4/types" "github.com/google/go-cmp/cmp" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -34,6 +37,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" @@ -56,22 +60,23 @@ const ( // ReconcileLaunchTemplate reconciles a launch template and triggers instance refresh conditionally, depending on // changes. // -//nolint:gocyclo +//nolint:gocyclo,maintidx func (s *Service) ReconcileLaunchTemplate( + ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, + s3Scope scope.S3Scope, ec2svc services.EC2Interface, + objectStoreSvc services.ObjectStoreInterface, canUpdateLaunchTemplate func() (bool, error), runPostLaunchTemplateUpdateOperation func() error, ) error { - bootstrapData, bootstrapDataSecretKey, err := scope.GetRawBootstrapData() + bootstrapData, bootstrapDataFormat, bootstrapDataSecretKey, err := scope.GetRawBootstrapData() if err != nil { record.Eventf(scope.GetMachinePool(), corev1.EventTypeWarning, "FailedGetBootstrapData", err.Error()) return err } - bootstrapDataHash := userdata.ComputeHash(bootstrapData) - scope.Info("checking for existing launch template") - launchTemplate, launchTemplateUserDataHash, launchTemplateUserDataSecretKey, err := ec2svc.GetLaunchTemplate(scope.LaunchTemplateName()) + launchTemplate, launchTemplateUserDataHash, launchTemplateUserDataSecretKey, _, err := ec2svc.GetLaunchTemplate(scope.LaunchTemplateName()) if err != nil { conditions.MarkUnknown(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) return err @@ -83,9 +88,99 @@ func (s *Service) ReconcileLaunchTemplate( return err } + var ignitionStorageType = infrav1.DefaultIgnitionStorageType + var ignitionVersion = infrav1.DefaultIgnitionVersion + if ignition := ignitionScope.Ignition(); ignition != nil { + ignitionStorageType = ignition.StorageType + ignitionVersion = ignition.Version + } + + var userDataForLaunchTemplate []byte + if bootstrapDataFormat == "ignition" && ignitionStorageType == infrav1.IgnitionStorageTypeOptionClusterObjectStore { + if s3Scope.Bucket() == nil { + return errors.New("using Ignition by default requires a cluster wide object storage configured at `AWSCluster.spec.s3Bucket`. " + + "You must configure one or instruct Ignition to use EC2 user data instead, by setting `AWSMachinePool.spec.ignition.storageType` to `UnencryptedUserData`") + } + + scope.Info("Using S3 bucket storage for Ignition format") + + // S3 bucket storage enabled and Ignition format is used. Ignition supports reading large user data from S3, + // not restricted by the EC2 user data size limit. The actual user data goes into the S3 object while the + // user data on the launch template points to the S3 bucket (or presigned URL). + // Previously, user data was always written into the launch template, so we check + // `AWSMachinePool.Spec.Ignition != nil` to toggle the S3 feature on for `AWSMachinePool` objects. + objectURL, err := objectStoreSvc.CreateForMachinePool(scope, bootstrapData) + + if err != nil { + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return err + } + + semver, err := semver.ParseTolerant(ignitionVersion) + if err != nil { + err = errors.Wrapf(err, "failed to parse ignition version %q", ignitionVersion) + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return err + } + + // EC2 user data points to S3 + switch semver.Major { + case 2: + ignData := &ignTypes.Config{ + Ignition: ignTypes.Ignition{ + Version: semver.String(), + Config: ignTypes.IgnitionConfig{ + Append: []ignTypes.ConfigReference{ + { + Source: objectURL, + }, + }, + }, + }, + } + + userDataForLaunchTemplate, err = json.Marshal(ignData) + if err != nil { + err = errors.Wrap(err, "failed to convert ignition config to JSON") + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return err + } + case 3: + ignData := &ignV3Types.Config{ + Ignition: ignV3Types.Ignition{ + Version: semver.String(), + Config: ignV3Types.IgnitionConfig{ + Merge: []ignV3Types.Resource{ + { + Source: aws.String(objectURL), + }, + }, + }, + }, + } + + userDataForLaunchTemplate, err = json.Marshal(ignData) + if err != nil { + err = errors.Wrap(err, "failed to convert ignition config to JSON") + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return err + } + default: + err = errors.Errorf("unsupported ignition version %q", ignitionVersion) + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return err + } + } else { + // S3 bucket not used, so the bootstrap data is stored directly in the launch template + // (EC2 user data) + userDataForLaunchTemplate = bootstrapData + } + + bootstrapDataForLaunchTemplateHash := userdata.ComputeHash(userDataForLaunchTemplate) + if launchTemplate == nil { scope.Info("no existing launch template found, creating") - launchTemplateID, err := ec2svc.CreateLaunchTemplate(scope, imageID, *bootstrapDataSecretKey, bootstrapData) + launchTemplateID, err := ec2svc.CreateLaunchTemplate(scope, imageID, *bootstrapDataSecretKey, userDataForLaunchTemplate, userdata.ComputeHash(bootstrapData)) if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) return err @@ -153,18 +248,42 @@ func (s *Service) ReconcileLaunchTemplate( } } - userDataHashChanged := launchTemplateUserDataHash != bootstrapDataHash + userDataHashChanged := launchTemplateUserDataHash != bootstrapDataForLaunchTemplateHash // Create a new launch template version if there's a difference in configuration, tags, // userdata, OR we've discovered a new AMI ID. if needsUpdate || tagsChanged || amiChanged || userDataHashChanged || userDataSecretKeyChanged || launchTemplateNeedsUserDataSecretKeyTag { scope.Info("creating new version for launch template", "existing", launchTemplate, "incoming", scope.GetLaunchTemplate(), "needsUpdate", needsUpdate, "tagsChanged", tagsChanged, "amiChanged", amiChanged, "userDataHashChanged", userDataHashChanged, "userDataSecretKeyChanged", userDataSecretKeyChanged) + // There is a limit to the number of Launch Template Versions. // We ensure that the number of versions does not grow without bound by following a simple rule: Before we create a new version, we delete one old version, if there is at least one old version that is not in use. - if err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()); err != nil { + deletedLaunchTemplateVersion, err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()) + if err != nil { return err } - if err := ec2svc.CreateLaunchTemplateVersion(scope.GetLaunchTemplateIDStatus(), scope, imageID, *bootstrapDataSecretKey, bootstrapData); err != nil { + + // S3 objects should be deleted as soon as possible if they're not used + // anymore. If this fails, it would still be cleaned by the bucket lifecycle + // policy later. + if feature.Gates.Enabled(feature.MachinePool) && deletedLaunchTemplateVersion != nil { + _, _, _, deletedLaunchTemplateVersionBootstrapDataHash, err := s.SDKToLaunchTemplate(deletedLaunchTemplateVersion) + if err != nil { + return err + } + + if deletedLaunchTemplateVersionBootstrapDataHash != nil && s3Scope.Bucket() != nil && bootstrapDataFormat == "ignition" && ignitionStorageType == infrav1.IgnitionStorageTypeOptionClusterObjectStore { + scope.Info("Deleting S3 object for deleted launch template version", "version", *deletedLaunchTemplateVersion.VersionNumber) + + err = objectStoreSvc.DeleteForMachinePool(scope, *deletedLaunchTemplateVersionBootstrapDataHash) + + // If any error happened above, log it and continue + if err != nil { + scope.Error(err, "Failed to delete S3 object for deleted launch template version, continuing because the bucket lifecycle policy will clean it later", "version", *deletedLaunchTemplateVersion.VersionNumber) + } + } + } + + if err := ec2svc.CreateLaunchTemplateVersion(scope.GetLaunchTemplateIDStatus(), scope, imageID, *bootstrapDataSecretKey, userDataForLaunchTemplate, userdata.ComputeHash(bootstrapData)); err != nil { return err } version, err := ec2svc.GetLaunchTemplateLatestVersion(scope.GetLaunchTemplateIDStatus()) @@ -345,9 +464,9 @@ func tagsChanged(annotation map[string]interface{}, src map[string]string) (bool // GetLaunchTemplate returns the existing LaunchTemplate or nothing if it doesn't exist. // For now by name until we need the input to be something different. -func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, error) { +func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, *string, error) { if launchTemplateName == "" { - return nil, "", nil, nil + return nil, "", nil, nil, nil } s.scope.Debug("Looking for existing LaunchTemplates") @@ -360,13 +479,13 @@ func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSL out, err := s.EC2Client.DescribeLaunchTemplateVersionsWithContext(context.TODO(), input) switch { case awserrors.IsNotFound(err): - return nil, "", nil, nil + return nil, "", nil, nil, nil case err != nil: - return nil, "", nil, err + return nil, "", nil, nil, err } if out == nil || out.LaunchTemplateVersions == nil || len(out.LaunchTemplateVersions) == 0 { - return nil, "", nil, nil + return nil, "", nil, nil, nil } return s.SDKToLaunchTemplate(out.LaunchTemplateVersions[0]) @@ -400,10 +519,10 @@ func (s *Service) GetLaunchTemplateID(launchTemplateName string) (string, error) } // CreateLaunchTemplate generates a launch template to be used with the autoscaling group. -func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (string, error) { +func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) (string, error) { s.scope.Info("Create a new launch template") - launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData) + launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData, bootstrapDataHash) if err != nil { return "", errors.Wrapf(err, "unable to form launch template data") } @@ -444,10 +563,14 @@ func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID } // CreateLaunchTemplateVersion will create a launch template. -func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) error { +// While userDataForLaunchTemplate is the data for the EC2 launch +// template, bootstrapDataHash relates to the final bootstrap data +// (not necessarily stored in EC2 user data, but could be in an S3 +// object). +func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userDataForLaunchTemplate []byte, bootstrapDataHash string) error { s.scope.Debug("creating new launch template version", "machine-pool", scope.LaunchTemplateName()) - launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData) + launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userDataForLaunchTemplate, bootstrapDataHash) if err != nil { return errors.Wrapf(err, "unable to form launch template data") } @@ -465,7 +588,7 @@ func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTempl return nil } -func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (*ec2.RequestLaunchTemplateData, error) { +func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userDataForLaunchTemplate []byte, bootstrapDataHash string) (*ec2.RequestLaunchTemplateData, error) { lt := scope.GetLaunchTemplate() // An explicit empty string for SSHKeyName means do not specify a key in the ASG launch @@ -477,7 +600,7 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag data := &ec2.RequestLaunchTemplateData{ InstanceType: aws.String(lt.InstanceType), KeyName: sshKeyNamePtr, - UserData: ptr.To[string](base64.StdEncoding.EncodeToString(userData)), + UserData: ptr.To[string](base64.StdEncoding.EncodeToString(userDataForLaunchTemplate)), } if lt.InstanceMetadataOptions != nil { @@ -548,7 +671,7 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag data.BlockDeviceMappings = blockDeviceMappings } - data.TagSpecifications = s.buildLaunchTemplateTagSpecificationRequest(scope, userDataSecretKey) + data.TagSpecifications = s.buildLaunchTemplateTagSpecificationRequest(scope, userDataSecretKey, bootstrapDataHash) return data, nil } @@ -603,7 +726,8 @@ func (s *Service) DeleteLaunchTemplate(id string) error { // It does not delete the "latest" version, because that version may still be in use. // It does not delete the "default" version, because that version cannot be deleted. // It does not assume that versions are sequential. Versions may be deleted out of band. -func (s *Service) PruneLaunchTemplateVersions(id string) error { +// If there was an unused version which was successfully deleted, it is returned. +func (s *Service) PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) { // When there is one version available, it is the default and the latest. // When there are two versions available, one the is the default, the other is the latest. // Therefore we only prune when there are at least 3 versions available. @@ -619,7 +743,7 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { out, err := s.EC2Client.DescribeLaunchTemplateVersionsWithContext(context.TODO(), input) if err != nil { s.scope.Info("", "aerr", err.Error()) - return err + return nil, err } // len(out.LaunchTemplateVersions) | items @@ -628,10 +752,14 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { // 2 | [default, latest] // 3 | [default, versionToPrune, latest] if len(out.LaunchTemplateVersions) < minCountToAllowPrune { - return nil + return nil, nil } - versionToPrune := out.LaunchTemplateVersions[1].VersionNumber - return s.deleteLaunchTemplateVersion(id, versionToPrune) + versionToPrune := out.LaunchTemplateVersions[1] + err = s.deleteLaunchTemplateVersion(id, versionToPrune.VersionNumber) + if err != nil { + return nil, err + } + return versionToPrune, nil } // GetLaunchTemplateLatestVersion returns the latest version of a launch template. @@ -655,11 +783,12 @@ func (s *Service) GetLaunchTemplateLatestVersion(id string) (string, error) { } func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { - s.scope.Debug("Deleting launch template version", "id", id) - if version == nil { return errors.New("version is a nil pointer") } + + s.scope.Debug("Deleting launch template version", "id", id, "version", *version) + versions := []string{strconv.FormatInt(*version, 10)} input := &ec2.DeleteLaunchTemplateVersionsInput{ @@ -672,12 +801,12 @@ func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { return err } - s.scope.Debug("Deleted launch template", "id", id, "version", *version) + s.scope.Debug("Deleted launch template version", "id", id, "version", *version) return nil } // SDKToLaunchTemplate converts an AWS EC2 SDK instance to the CAPA instance type. -func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, error) { +func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, *string, error) { v := d.LaunchTemplateData i := &expinfrav1.AWSLaunchTemplate{ Name: aws.StringValue(d.LaunchTemplateName), @@ -732,30 +861,35 @@ func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1 } if v.UserData == nil { - return i, userdata.ComputeHash(nil), nil, nil + return i, userdata.ComputeHash(nil), nil, nil, nil } decodedUserData, err := base64.StdEncoding.DecodeString(*v.UserData) if err != nil { - return nil, "", nil, errors.Wrap(err, "unable to decode UserData") + return nil, "", nil, nil, errors.Wrap(err, "unable to decode UserData") } decodedUserDataHash := userdata.ComputeHash(decodedUserData) + var launchTemplateUserDataSecretKey *apimachinerytypes.NamespacedName + var bootstrapDataHash *string for _, tagSpecification := range v.TagSpecifications { if tagSpecification.ResourceType != nil && *tagSpecification.ResourceType == ec2.ResourceTypeInstance { for _, tag := range tagSpecification.Tags { if tag.Key != nil && *tag.Key == infrav1.LaunchTemplateBootstrapDataSecret && tag.Value != nil && strings.Contains(*tag.Value, "/") { parts := strings.SplitN(*tag.Value, "/", 2) - launchTemplateUserDataSecretKey := &apimachinerytypes.NamespacedName{ + launchTemplateUserDataSecretKey = &apimachinerytypes.NamespacedName{ Namespace: parts[0], Name: parts[1], } - return i, decodedUserDataHash, launchTemplateUserDataSecretKey, nil + } + + if tag.Key != nil && *tag.Key == infrav1.LaunchTemplateBootstrapDataHash && tag.Value != nil && *tag.Value != "" { + bootstrapDataHash = tag.Value } } } } - return i, decodedUserDataHash, nil, nil + return i, decodedUserDataHash, launchTemplateUserDataSecretKey, bootstrapDataHash, nil } // LaunchTemplateNeedsUpdate checks if a new launch template version is needed. @@ -891,7 +1025,7 @@ func (s *Service) GetAdditionalSecurityGroupsIDs(securityGroups []infrav1.AWSRes return additionalSecurityGroupsIDs, nil } -func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchTemplateScope, userDataSecretKey apimachinerytypes.NamespacedName) []*ec2.LaunchTemplateTagSpecificationRequest { +func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchTemplateScope, userDataSecretKey apimachinerytypes.NamespacedName, bootstrapDataHash string) []*ec2.LaunchTemplateTagSpecificationRequest { tagSpecifications := make([]*ec2.LaunchTemplateTagSpecificationRequest, 0) additionalTags := scope.AdditionalTags() // Set the cloud provider tag @@ -909,6 +1043,7 @@ func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchT { instanceTags := tags.DeepCopy() instanceTags[infrav1.LaunchTemplateBootstrapDataSecret] = userDataSecretKey.String() + instanceTags[infrav1.LaunchTemplateBootstrapDataHash] = bootstrapDataHash spec := &ec2.LaunchTemplateTagSpecificationRequest{ResourceType: aws.String(ec2.ResourceTypeInstance)} for key, value := range instanceTags { diff --git a/pkg/cloud/services/ec2/launchtemplate_test.go b/pkg/cloud/services/ec2/launchtemplate_test.go index 4553ad4546..bdd3ccadab 100644 --- a/pkg/cloud/services/ec2/launchtemplate_test.go +++ b/pkg/cloud/services/ec2/launchtemplate_test.go @@ -80,12 +80,21 @@ users: var testUserDataHash = userdata.ComputeHash([]byte(testUserData)) -func defaultEC2AndUserDataSecretKeyTags(name string, clusterName string, userDataSecretKey types.NamespacedName) []*ec2.Tag { +var testBootstrapData = []byte("different from testUserData since bootstrap data may be in S3 while EC2 user data points to that S3 object") +var testBootstrapDataHash = userdata.ComputeHash(testBootstrapData) + +func defaultEC2AndDataTags(name string, clusterName string, userDataSecretKey types.NamespacedName, bootstrapDataHash string) []*ec2.Tag { tags := defaultEC2Tags(name, clusterName) - tags = append(tags, &ec2.Tag{ - Key: aws.String(infrav1.LaunchTemplateBootstrapDataSecret), - Value: aws.String(userDataSecretKey.String()), - }) + tags = append( + tags, + &ec2.Tag{ + Key: aws.String(infrav1.LaunchTemplateBootstrapDataSecret), + Value: aws.String(userDataSecretKey.String()), + }, + &ec2.Tag{ + Key: aws.String(infrav1.LaunchTemplateBootstrapDataHash), + Value: aws.String(bootstrapDataHash), + }) sortTags(tags) return tags } @@ -295,7 +304,7 @@ func TestGetLaunchTemplate(t *testing.T) { tc.expect(mockEC2Client.EXPECT()) } - launchTemplate, userData, _, err := s.GetLaunchTemplate(tc.launchTemplateName) + launchTemplate, userData, _, _, err := s.GetLaunchTemplate(tc.launchTemplateName) tc.check(g, launchTemplate, userData, err) }) } @@ -303,12 +312,13 @@ func TestGetLaunchTemplate(t *testing.T) { func TestServiceSDKToLaunchTemplate(t *testing.T) { tests := []struct { - name string - input *ec2.LaunchTemplateVersion - wantLT *expinfrav1.AWSLaunchTemplate - wantHash string - wantDataSecretKey *types.NamespacedName - wantErr bool + name string + input *ec2.LaunchTemplateVersion + wantLT *expinfrav1.AWSLaunchTemplate + wantUserDataHash string + wantDataSecretKey *types.NamespacedName + wantBootstrapDataHash *string + wantErr bool }{ { name: "lots of input", @@ -350,8 +360,9 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { SSHKeyName: aws.String("foo-keyname"), VersionNumber: aws.Int64(1), }, - wantHash: testUserDataHash, - wantDataSecretKey: nil, // respective tag is not given + wantUserDataHash: testUserDataHash, + wantDataSecretKey: nil, // respective tag is not given + wantBootstrapDataHash: nil, // respective tag is not given }, { name: "tag of bootstrap secret", @@ -388,6 +399,10 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-secret"), Value: aws.String("bootstrap-secret-ns/bootstrap-secret"), }, + { + Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-hash"), + Value: aws.String(testBootstrapDataHash), + }, }, }, }, @@ -404,26 +419,29 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { SSHKeyName: aws.String("foo-keyname"), VersionNumber: aws.Int64(1), }, - wantHash: testUserDataHash, - wantDataSecretKey: &types.NamespacedName{Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret"}, + wantUserDataHash: testUserDataHash, + wantDataSecretKey: &types.NamespacedName{Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret"}, + wantBootstrapDataHash: &testBootstrapDataHash, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) s := &Service{} - gotLT, gotHash, gotDataSecretKey, err := s.SDKToLaunchTemplate(tt.input) + gotLT, gotUserDataHash, gotDataSecretKey, gotBootstrapDataHash, err := s.SDKToLaunchTemplate(tt.input) if (err != nil) != tt.wantErr { t.Fatalf("error mismatch: got %v, wantErr %v", err, tt.wantErr) } if !cmp.Equal(gotLT, tt.wantLT) { t.Fatalf("launchTemplate mismatch: got %v, want %v", gotLT, tt.wantLT) } - if !cmp.Equal(gotHash, tt.wantHash) { - t.Fatalf("userDataHash mismatch: got %v, want %v", gotHash, tt.wantHash) + if !cmp.Equal(gotUserDataHash, tt.wantUserDataHash) { + t.Fatalf("userDataHash mismatch: got %v, want %v", gotUserDataHash, tt.wantUserDataHash) } if !cmp.Equal(gotDataSecretKey, tt.wantDataSecretKey) { t.Fatalf("userDataSecretKey mismatch: got %v, want %v", gotDataSecretKey, tt.wantDataSecretKey) } + g.Expect(gotBootstrapDataHash).To(Equal(tt.wantBootstrapDataHash)) }) } } @@ -845,7 +863,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -905,7 +923,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -967,7 +985,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1022,7 +1040,7 @@ func TestCreateLaunchTemplate(t *testing.T) { tc.expect(g, mockEC2Client.EXPECT()) } - launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, userData) + launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash) tc.check(g, launchTemplate, err) }) } @@ -1050,7 +1068,7 @@ func TestLaunchTemplateDataCreation(t *testing.T) { Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret", } - launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, nil) + launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, nil, "") g.Expect(err).To(HaveOccurred()) g.Expect(launchTemplate).Should(BeEmpty()) }) @@ -1104,7 +1122,7 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1155,7 +1173,7 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1202,10 +1220,10 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { tc.expect(mockEC2Client.EXPECT()) } if tc.wantErr { - g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData)).To(HaveOccurred()) + g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash)).To(HaveOccurred()) return } - g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData)).NotTo(HaveOccurred()) + g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash)).NotTo(HaveOccurred()) }) } } @@ -1218,6 +1236,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret", } + bootstrapDataHash := userdata.ComputeHash([]byte("shell-script")) testCases := []struct { name string check func(g *WithT, m []*ec2.LaunchTemplateTagSpecificationRequest) @@ -1228,7 +1247,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { expected := []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, bootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1258,7 +1277,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) s := NewService(cs) - tc.check(g, s.buildLaunchTemplateTagSpecificationRequest(ms, userDataSecretKey)) + tc.check(g, s.buildLaunchTemplateTagSpecificationRequest(ms, userDataSecretKey, bootstrapDataHash)) }) } } diff --git a/pkg/cloud/services/interfaces.go b/pkg/cloud/services/interfaces.go index 5e0f3dc2e6..9b6bbd640a 100644 --- a/pkg/cloud/services/interfaces.go +++ b/pkg/cloud/services/interfaces.go @@ -20,6 +20,7 @@ package services import ( "context" + "github.com/aws/aws-sdk-go/service/ec2" apimachinerytypes "k8s.io/apimachinery/pkg/types" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" @@ -71,12 +72,12 @@ type EC2Interface interface { DetachSecurityGroupsFromNetworkInterface(groups []string, interfaceID string) error DiscoverLaunchTemplateAMI(scope scope.LaunchTemplateScope) (*string, error) - GetLaunchTemplate(id string) (lt *expinfrav1.AWSLaunchTemplate, userDataHash string, userDataSecretKey *apimachinerytypes.NamespacedName, err error) + GetLaunchTemplate(id string) (lt *expinfrav1.AWSLaunchTemplate, userDataHash string, userDataSecretKey *apimachinerytypes.NamespacedName, bootstrapDataHash *string, err error) GetLaunchTemplateID(id string) (string, error) GetLaunchTemplateLatestVersion(id string) (string, error) - CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (string, error) - CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) error - PruneLaunchTemplateVersions(id string) error + CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) (string, error) + CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) error + PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) DeleteLaunchTemplate(id string) error LaunchTemplateNeedsUpdate(scope scope.LaunchTemplateScope, incoming *expinfrav1.AWSLaunchTemplate, existing *expinfrav1.AWSLaunchTemplate) (bool, error) DeleteBastion() error @@ -92,7 +93,7 @@ type EC2Interface interface { // separate from EC2Interface so that we can mock AWS requests separately. For example, by not mocking the // ReconcileLaunchTemplate function, but mocking EC2Interface, we can test which EC2 API operations would have been called. type MachinePoolReconcileInterface interface { - ReconcileLaunchTemplate(scope scope.LaunchTemplateScope, ec2svc EC2Interface, canUpdateLaunchTemplate func() (bool, error), runPostLaunchTemplateUpdateOperation func() error) error + ReconcileLaunchTemplate(ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, s3Scope scope.S3Scope, ec2svc EC2Interface, objectStoreSvc ObjectStoreInterface, canUpdateLaunchTemplate func() (bool, error), runPostLaunchTemplateUpdateOperation func() error) error ReconcileTags(scope scope.LaunchTemplateScope, resourceServicesToUpdate []scope.ResourceServiceToUpdate) error } @@ -137,6 +138,8 @@ type ObjectStoreInterface interface { ReconcileBucket() error Delete(m *scope.MachineScope) error Create(m *scope.MachineScope, data []byte) (objectURL string, err error) + CreateForMachinePool(scope scope.LaunchTemplateScope, data []byte) (objectURL string, err error) + DeleteForMachinePool(scope scope.LaunchTemplateScope, bootstrapDataHash string) error } // AWSNodeInterface installs the CNI for EKS clusters. diff --git a/pkg/cloud/services/mock_services/ec2_interface_mock.go b/pkg/cloud/services/mock_services/ec2_interface_mock.go index d46dc001e4..0de1443e2c 100644 --- a/pkg/cloud/services/mock_services/ec2_interface_mock.go +++ b/pkg/cloud/services/mock_services/ec2_interface_mock.go @@ -23,6 +23,7 @@ package mock_services import ( reflect "reflect" + ec2 "github.com/aws/aws-sdk-go/service/ec2" gomock "github.com/golang/mock/gomock" types "k8s.io/apimachinery/pkg/types" v1beta2 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" @@ -69,32 +70,32 @@ func (mr *MockEC2InterfaceMockRecorder) CreateInstance(arg0, arg1, arg2 interfac } // CreateLaunchTemplate mocks base method. -func (m *MockEC2Interface) CreateLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 *string, arg2 types.NamespacedName, arg3 []byte) (string, error) { +func (m *MockEC2Interface) CreateLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 *string, arg2 types.NamespacedName, arg3 []byte, arg4 string) (string, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateLaunchTemplate", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "CreateLaunchTemplate", arg0, arg1, arg2, arg3, arg4) ret0, _ := ret[0].(string) ret1, _ := ret[1].(error) return ret0, ret1 } // CreateLaunchTemplate indicates an expected call of CreateLaunchTemplate. -func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplate(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplate(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplate", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplate), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplate", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplate), arg0, arg1, arg2, arg3, arg4) } // CreateLaunchTemplateVersion mocks base method. -func (m *MockEC2Interface) CreateLaunchTemplateVersion(arg0 string, arg1 scope.LaunchTemplateScope, arg2 *string, arg3 types.NamespacedName, arg4 []byte) error { +func (m *MockEC2Interface) CreateLaunchTemplateVersion(arg0 string, arg1 scope.LaunchTemplateScope, arg2 *string, arg3 types.NamespacedName, arg4 []byte, arg5 string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateLaunchTemplateVersion", arg0, arg1, arg2, arg3, arg4) + ret := m.ctrl.Call(m, "CreateLaunchTemplateVersion", arg0, arg1, arg2, arg3, arg4, arg5) ret0, _ := ret[0].(error) return ret0 } // CreateLaunchTemplateVersion indicates an expected call of CreateLaunchTemplateVersion. -func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplateVersion(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { +func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplateVersion(arg0, arg1, arg2, arg3, arg4, arg5 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplateVersion", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplateVersion), arg0, arg1, arg2, arg3, arg4) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplateVersion", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplateVersion), arg0, arg1, arg2, arg3, arg4, arg5) } // DeleteBastion mocks base method. @@ -200,14 +201,15 @@ func (mr *MockEC2InterfaceMockRecorder) GetInstanceSecurityGroups(arg0 interface } // GetLaunchTemplate mocks base method. -func (m *MockEC2Interface) GetLaunchTemplate(arg0 string) (*v1beta20.AWSLaunchTemplate, string, *types.NamespacedName, error) { +func (m *MockEC2Interface) GetLaunchTemplate(arg0 string) (*v1beta20.AWSLaunchTemplate, string, *types.NamespacedName, *string, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetLaunchTemplate", arg0) ret0, _ := ret[0].(*v1beta20.AWSLaunchTemplate) ret1, _ := ret[1].(string) ret2, _ := ret[2].(*types.NamespacedName) - ret3, _ := ret[3].(error) - return ret0, ret1, ret2, ret3 + ret3, _ := ret[3].(*string) + ret4, _ := ret[4].(error) + return ret0, ret1, ret2, ret3, ret4 } // GetLaunchTemplate indicates an expected call of GetLaunchTemplate. @@ -306,11 +308,12 @@ func (mr *MockEC2InterfaceMockRecorder) ModifyInstanceMetadataOptions(arg0, arg1 } // PruneLaunchTemplateVersions mocks base method. -func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) error { +func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) (*ec2.LaunchTemplateVersion, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "PruneLaunchTemplateVersions", arg0) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].(*ec2.LaunchTemplateVersion) + ret1, _ := ret[1].(error) + return ret0, ret1 } // PruneLaunchTemplateVersions indicates an expected call of PruneLaunchTemplateVersions. diff --git a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go index 559f356f3a..f38bf008dd 100644 --- a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go +++ b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go @@ -65,6 +65,21 @@ func (mr *MockObjectStoreInterfaceMockRecorder) Create(arg0, arg1 interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Create", reflect.TypeOf((*MockObjectStoreInterface)(nil).Create), arg0, arg1) } +// CreateForMachinePool mocks base method. +func (m *MockObjectStoreInterface) CreateForMachinePool(arg0 scope.LaunchTemplateScope, arg1 []byte) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CreateForMachinePool", arg0, arg1) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CreateForMachinePool indicates an expected call of CreateForMachinePool. +func (mr *MockObjectStoreInterfaceMockRecorder) CreateForMachinePool(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateForMachinePool", reflect.TypeOf((*MockObjectStoreInterface)(nil).CreateForMachinePool), arg0, arg1) +} + // Delete mocks base method. func (m *MockObjectStoreInterface) Delete(arg0 *scope.MachineScope) error { m.ctrl.T.Helper() @@ -93,6 +108,20 @@ func (mr *MockObjectStoreInterfaceMockRecorder) DeleteBucket() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteBucket", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteBucket)) } +// DeleteForMachinePool mocks base method. +func (m *MockObjectStoreInterface) DeleteForMachinePool(arg0 scope.LaunchTemplateScope, arg1 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteForMachinePool", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteForMachinePool indicates an expected call of DeleteForMachinePool. +func (mr *MockObjectStoreInterfaceMockRecorder) DeleteForMachinePool(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteForMachinePool", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteForMachinePool), arg0, arg1) +} + // ReconcileBucket mocks base method. func (m *MockObjectStoreInterface) ReconcileBucket() error { m.ctrl.T.Helper() diff --git a/pkg/cloud/services/mock_services/reconcile_interface_mock.go b/pkg/cloud/services/mock_services/reconcile_interface_mock.go index 3771e81e3a..a8992bde61 100644 --- a/pkg/cloud/services/mock_services/reconcile_interface_mock.go +++ b/pkg/cloud/services/mock_services/reconcile_interface_mock.go @@ -52,17 +52,17 @@ func (m *MockMachinePoolReconcileInterface) EXPECT() *MockMachinePoolReconcileIn } // ReconcileLaunchTemplate mocks base method. -func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 services.EC2Interface, arg2 func() (bool, error), arg3 func() error) error { +func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.IgnitionScope, arg1 scope.LaunchTemplateScope, arg2 scope.S3Scope, arg3 services.EC2Interface, arg4 services.ObjectStoreInterface, arg5 func() (bool, error), arg6 func() error) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ReconcileLaunchTemplate", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "ReconcileLaunchTemplate", arg0, arg1, arg2, arg3, arg4, arg5, arg6) ret0, _ := ret[0].(error) return ret0 } // ReconcileLaunchTemplate indicates an expected call of ReconcileLaunchTemplate. -func (mr *MockMachinePoolReconcileInterfaceMockRecorder) ReconcileLaunchTemplate(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockMachinePoolReconcileInterfaceMockRecorder) ReconcileLaunchTemplate(arg0, arg1, arg2, arg3, arg4, arg5, arg6 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReconcileLaunchTemplate", reflect.TypeOf((*MockMachinePoolReconcileInterface)(nil).ReconcileLaunchTemplate), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReconcileLaunchTemplate", reflect.TypeOf((*MockMachinePoolReconcileInterface)(nil).ReconcileLaunchTemplate), arg0, arg1, arg2, arg3, arg4, arg5, arg6) } // ReconcileTags mocks base method. diff --git a/pkg/cloud/services/s3/s3.go b/pkg/cloud/services/s3/s3.go index 6eb8582585..3a6e949cdc 100644 --- a/pkg/cloud/services/s3/s3.go +++ b/pkg/cloud/services/s3/s3.go @@ -35,8 +35,10 @@ import ( "k8s.io/utils/ptr" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" iam "sigs.k8s.io/cluster-api-provider-aws/v2/iam/api/v1beta1" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/util/system" ) @@ -84,6 +86,10 @@ func (s *Service) ReconcileBucket() error { return errors.Wrap(err, "ensuring bucket policy") } + if err := s.ensureBucketLifecycleConfiguration(bucketName); err != nil { + return errors.Wrap(err, "ensuring bucket lifecycle configuration") + } + return nil } @@ -99,6 +105,50 @@ func (s *Service) DeleteBucket() error { log.Info("Deleting S3 Bucket") + if feature.Gates.Enabled(feature.MachinePool) { + // Delete machine pool user data files that did not get deleted + // yet by the lifecycle policy + for { + log.Info("Listing S3 objects of machine pools") + + // TODO Switch to aws-sdk-go-v2 which has NewListObjectsV2Paginator (as part of https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues/2225) + out, err := s.S3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String("machine-pool/"), + }) + if err != nil { + aerr, ok := err.(awserr.Error) + if !ok { + return errors.Wrap(err, "listing S3 bucket") + } + + switch aerr.Code() { + case s3.ErrCodeNoSuchBucket: + log.Info("Bucket already removed") + return nil + default: + return errors.Wrap(aerr, "listing S3 bucket") + } + } + + // Stop on last page of results + if len(out.Contents) == 0 { + break + } + + log.Info("Deleting S3 objects of machine pools", "count", len(out.Contents)) + for _, obj := range out.Contents { + _, err := s.S3Client.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(bucketName), + Key: obj.Key, + }) + if err != nil { + return err + } + } + } + } + _, err := s.S3Client.DeleteBucket(&s3.DeleteBucketInput{ Bucket: aws.String(bucketName), }) @@ -169,6 +219,52 @@ func (s *Service) Create(m *scope.MachineScope, data []byte) (string, error) { return objectURL.String(), nil } +// CreateForMachinePool creates an object for machine pool related bootstrap data in the S3 bucket. +func (s *Service) CreateForMachinePool(scope scope.LaunchTemplateScope, data []byte) (string, error) { + if !s.bucketManagementEnabled() { + return "", errors.New("requested object creation but bucket management is not enabled") + } + + if scope.LaunchTemplateName() == "" { + return "", errors.New("launch template name can't be empty") + } + + if len(data) == 0 { + return "", errors.New("got empty data") + } + + bucket := s.bucketName() + key := s.bootstrapDataKeyForMachinePool(scope, userdata.ComputeHash(data)) + + s.scope.Info("Creating object for machine pool", "bucket_name", bucket, "key", key) + + if _, err := s.S3Client.PutObject(&s3.PutObjectInput{ + Body: aws.ReadSeekCloser(bytes.NewReader(data)), + Bucket: aws.String(bucket), + Key: aws.String(key), + ServerSideEncryption: aws.String("aws:kms"), + }); err != nil { + return "", errors.Wrap(err, "putting object for machine pool") + } + + if exp := s.scope.Bucket().PresignedURLDuration; exp != nil { + s.scope.Info("Generating presigned URL", "bucket_name", bucket, "key", key) + req, _ := s.S3Client.GetObjectRequest(&s3.GetObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + return req.Presign(exp.Duration) + } + + objectURL := &url.URL{ + Scheme: "s3", + Host: bucket, + Path: key, + } + + return objectURL.String(), nil +} + // Delete deletes the object from the S3 bucket. func (s *Service) Delete(m *scope.MachineScope) error { if !s.bucketManagementEnabled() { @@ -241,6 +337,43 @@ func (s *Service) deleteObject(bucket, key string) error { return nil } +// DeleteForMachinePool deletes the object for machine pool related bootstrap data from the S3 bucket. +func (s *Service) DeleteForMachinePool(scope scope.LaunchTemplateScope, bootstrapDataHash string) error { + if !s.bucketManagementEnabled() { + return errors.New("requested object deletion but bucket management is not enabled") + } + + if scope.LaunchTemplateName() == "" { + return errors.New("launch template name can't be empty") + } + + bucket := s.bucketName() + key := s.bootstrapDataKeyForMachinePool(scope, bootstrapDataHash) + + s.scope.Info("Deleting object for machine pool", "bucket_name", bucket, "key", key) + + _, err := s.S3Client.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + if err == nil { + return nil + } + + aerr, ok := err.(awserr.Error) + if !ok { + return errors.Wrap(err, "deleting S3 object for machine pool") + } + + switch aerr.Code() { + case s3.ErrCodeNoSuchBucket: + default: + return errors.Wrap(aerr, "deleting S3 object for machine pool") + } + + return nil +} + func (s *Service) createBucketIfNotExist(bucketName string) error { input := &s3.CreateBucketInput{Bucket: aws.String(bucketName)} @@ -294,6 +427,43 @@ func (s *Service) ensureBucketPolicy(bucketName string) error { return nil } +func (s *Service) ensureBucketLifecycleConfiguration(bucketName string) error { + if !feature.Gates.Enabled(feature.MachinePool) { + return nil + } + + input := &s3.PutBucketLifecycleConfigurationInput{ + Bucket: aws.String(bucketName), + LifecycleConfiguration: &s3.BucketLifecycleConfiguration{ + Rules: []*s3.LifecycleRule{ + { + ID: aws.String("machine-pool"), + Expiration: &s3.LifecycleExpiration{ + // The bootstrap token for new nodes to join the cluster is normally rotated regularly, + // such as in CAPI's `KubeadmConfig` reconciler. Therefore, the launch template user data + // stored in the S3 bucket only needs to live longer than the token TTL. + // This lifecycle policy is here as backup. Normally, CAPA should delete outdated S3 objects + // (see function `DeleteForMachinePool`). + Days: aws.Int64(1), + }, + Filter: &s3.LifecycleRuleFilter{ + Prefix: aws.String("machine-pool/"), + }, + Status: aws.String(s3.ExpirationStatusEnabled), + }, + }, + }, + } + + if _, err := s.S3Client.PutBucketLifecycleConfiguration(input); err != nil { + return errors.Wrap(err, "creating S3 bucket lifecycle configuration") + } + + s.scope.Trace("Updated bucket lifecycle configuration", "bucket_name", bucketName) + + return nil +} + func (s *Service) tagBucket(bucketName string) error { taggingInput := &s3.PutBucketTaggingInput{ Bucket: aws.String(bucketName), @@ -371,15 +541,26 @@ func (s *Service) bucketPolicy(bucketName string) (string, error) { } for _, iamInstanceProfile := range bucket.NodesIAMInstanceProfiles { - statements = append(statements, iam.StatementEntry{ - Sid: iamInstanceProfile, - Effect: iam.EffectAllow, - Principal: map[iam.PrincipalType]iam.PrincipalID{ - iam.PrincipalAWS: []string{fmt.Sprintf("arn:%s:iam::%s:role/%s", partition, *accountID.Account, iamInstanceProfile)}, + statements = append( + statements, + iam.StatementEntry{ + Sid: iamInstanceProfile, + Effect: iam.EffectAllow, + Principal: map[iam.PrincipalType]iam.PrincipalID{ + iam.PrincipalAWS: []string{fmt.Sprintf("arn:%s:iam::%s:role/%s", partition, *accountID.Account, iamInstanceProfile)}, + }, + Action: []string{"s3:GetObject"}, + Resource: []string{fmt.Sprintf("arn:%s:s3:::%s/node/*", partition, bucketName)}, }, - Action: []string{"s3:GetObject"}, - Resource: []string{fmt.Sprintf("arn:%s:s3:::%s/node/*", partition, bucketName)}, - }) + iam.StatementEntry{ + Sid: iamInstanceProfile, + Effect: iam.EffectAllow, + Principal: map[iam.PrincipalType]iam.PrincipalID{ + iam.PrincipalAWS: []string{fmt.Sprintf("arn:%s:iam::%s:role/%s", partition, *accountID.Account, iamInstanceProfile)}, + }, + Action: []string{"s3:GetObject"}, + Resource: []string{fmt.Sprintf("arn:%s:s3:::%s/machine-pool/*", partition, bucketName)}, + }) } } @@ -408,3 +589,7 @@ func (s *Service) bootstrapDataKey(m *scope.MachineScope) string { // Use machine name as object key. return path.Join(m.Role(), m.Name()) } + +func (s *Service) bootstrapDataKeyForMachinePool(scope scope.LaunchTemplateScope, dataHash string) string { + return path.Join("machine-pool", scope.LaunchTemplateName(), dataHash) +} diff --git a/pkg/cloud/services/s3/s3_test.go b/pkg/cloud/services/s3/s3_test.go index 3db7abfca7..62995375c4 100644 --- a/pkg/cloud/services/s3/s3_test.go +++ b/pkg/cloud/services/s3/s3_test.go @@ -32,9 +32,11 @@ import ( "github.com/golang/mock/gomock" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + utilfeature "k8s.io/component-base/featuregate/testing" "sigs.k8s.io/controller-runtime/pkg/client/fake" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" iamv1 "sigs.k8s.io/cluster-api-provider-aws/v2/iam/api/v1beta1" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" @@ -49,8 +51,6 @@ const ( ) func TestReconcileBucket(t *testing.T) { - t.Parallel() - t.Run("does_nothing_when_bucket_management_is_disabled", func(t *testing.T) { t.Parallel() @@ -62,7 +62,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("creates_bucket_with_configured_name", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() expectedBucketName := "baz" @@ -104,6 +104,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().PutBucketTagging(gomock.Eq(taggingInput)).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -111,7 +112,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("hashes_default_bucket_name_if_name_exceeds_maximum_length", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() mockCtrl := gomock.NewController(t) s3Mock := mock_s3iface.NewMockS3API(mockCtrl) @@ -159,6 +160,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -166,7 +168,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("creates_bucket_with_policy_allowing_controlplane_and_worker_nodes_to_read_their_secrets", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() bucketName := "bar" @@ -213,6 +215,7 @@ func TestReconcileBucket(t *testing.T) { t.Errorf("Expected deny when not using SecureTransport; got: %v", policy) } }).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -220,13 +223,14 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("is_idempotent", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) s3Mock.EXPECT().CreateBucket(gomock.Any()).Return(nil, nil).Times(2) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(2) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(2) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(2) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -238,7 +242,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("ignores_when_bucket_already_exists_but_its_owned_by_the_same_account", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) @@ -247,6 +251,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().CreateBucket(gomock.Any()).Return(nil, err).Times(1) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error, got: %v", err) @@ -327,6 +332,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().CreateBucket(gomock.Eq(input)).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -341,7 +347,7 @@ func TestDeleteBucket(t *testing.T) { const bucketName = "foo" t.Run("does_nothing_when_bucket_management_is_disabled", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, _ := testService(t, nil) @@ -351,7 +357,7 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("deletes_bucket_with_configured_name", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{ Bucket: &infrav1.S3Bucket{ @@ -363,6 +369,7 @@ func TestDeleteBucket(t *testing.T) { Bucket: aws.String(bucketName), } + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(input).Return(nil, nil).Times(1) if err := svc.DeleteBucket(); err != nil { @@ -371,12 +378,12 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("returns_error_when_bucket_removal_returns", func(t *testing.T) { - t.Parallel() t.Run("unexpected_error", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, errors.New("err")).Times(1) if err := svc.DeleteBucket(); err == nil { @@ -385,10 +392,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("unexpected_AWS_error", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New("foo", "", nil)).Times(1) if err := svc.DeleteBucket(); err == nil { @@ -398,10 +406,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("ignores_when_bucket_has_already_been_removed", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New(s3svc.ErrCodeNoSuchBucket, "", nil)).Times(1) if err := svc.DeleteBucket(); err != nil { @@ -410,10 +419,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("skips_bucket_removal_when_bucket_is_not_empty", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New("BucketNotEmpty", "", nil)).Times(1) if err := svc.DeleteBucket(); err != nil {