Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Make VPC creation idempotent to avoid indefinite creation of new VPCs if storage of the ID fails #4723

Merged
merged 1 commit into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion controllers/awscluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,10 @@ func mockedCreateVPCCalls(m *mocks.MockEC2APIMockRecorder) {
}

func mockedCreateMaximumVPCCalls(m *mocks.MockEC2APIMockRecorder) {
m.CreateVpcWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.CreateVpcInput{})).Return(nil, errors.New("The maximum number of VPCs has been reached"))
describeVPCByNameCall := m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{
Vpcs: []*ec2.Vpc{},
}, nil)
m.CreateVpcWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.CreateVpcInput{})).After(describeVPCByNameCall).Return(nil, errors.New("The maximum number of VPCs has been reached"))
}

func mockedDeleteVPCCallsForNonExistentVPC(m *mocks.MockEC2APIMockRecorder) {
Expand Down
89 changes: 79 additions & 10 deletions pkg/cloud/services/network/vpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,45 @@ func (s *Service) reconcileVPC() error {
return nil
}

// .spec.vpc.id is nil, Create a new managed vpc.
if !conditions.Has(s.scope.InfraCluster(), infrav1.VpcReadyCondition) {
conditions.MarkFalse(s.scope.InfraCluster(), infrav1.VpcReadyCondition, infrav1.VpcCreationStartedReason, clusterv1.ConditionSeverityInfo, "")
if err := s.scope.PatchObject(); err != nil {
return errors.Wrap(err, "failed to patch conditions")
// .spec.vpc.id is nil. This means no managed VPC exists or we failed to save its ID before. Check if a managed VPC
// with the desired name exists, or if not, create a new managed VPC.

vpc, err := s.describeVPCByName()
if err == nil {
// An VPC already exists with the desired name

if !vpc.Tags.HasOwned(s.scope.Name()) {
return errors.Errorf(
"found VPC %q which cannot be managed by CAPA due to lack of tags (either tag the VPC manually with `%s=%s`, or provide the `vpc.id` field instead if you wish to bring your own VPC as shown in https://cluster-api-aws.sigs.k8s.io/topics/bring-your-own-aws-infrastructure)",
vpc.ID,
infrav1.ClusterTagKey(s.scope.Name()),
infrav1.ResourceLifecycleOwned)
}
} else {
if !awserrors.IsNotFound(err) {
return errors.Wrap(err, "failed to describe VPC resources by name")
}

// VPC with that name does not exist yet. Create it.
vpc, err = s.createVPC()
if err != nil {
return errors.Wrap(err, "failed to create new managed VPC")
}
s.scope.Info("Created VPC", "vpc-id", vpc.ID)
}
vpc, err := s.createVPC()
if err != nil {
return errors.Wrap(err, "failed to create new vpc")
}
s.scope.Info("Created VPC", "vpc-id", vpc.ID)

s.scope.VPC().CidrBlock = vpc.CidrBlock
s.scope.VPC().IPv6 = vpc.IPv6
s.scope.VPC().Tags = vpc.Tags
s.scope.VPC().ID = vpc.ID

if !conditions.Has(s.scope.InfraCluster(), infrav1.VpcReadyCondition) {
conditions.MarkFalse(s.scope.InfraCluster(), infrav1.VpcReadyCondition, infrav1.VpcCreationStartedReason, clusterv1.ConditionSeverityInfo, "")
if err := s.scope.PatchObject(); err != nil {
return errors.Wrap(err, "failed to patch conditions")
}
}

// Make sure attributes are configured
if err := wait.WaitForWithRetryable(wait.NewBackoff(), func() (bool, error) {
if err := s.ensureManagedVPCAttributes(vpc); err != nil {
Expand Down Expand Up @@ -573,6 +594,54 @@ func (s *Service) describeVPCByID() (*infrav1.VPCSpec, error) {
return vpc, nil
}

// describeVPCByName finds the VPC by `Name` tag. Use this if the ID is not available yet, either because no
// VPC was created until now or if storing the ID could have failed.
func (s *Service) describeVPCByName() (*infrav1.VPCSpec, error) {
vpcName := *s.getVPCTagParams(services.TemporaryResourceID).Name

input := &ec2.DescribeVpcsInput{
Filters: []*ec2.Filter{
{
Name: aws.String("tag:Name"),
Values: aws.StringSlice([]string{vpcName}),
},
},
}

out, err := s.EC2Client.DescribeVpcsWithContext(context.TODO(), input)
if (err != nil && awserrors.IsNotFound(err)) || (out != nil && len(out.Vpcs) == 0) {
return nil, awserrors.NewNotFound(fmt.Sprintf("could not find VPC by name %q", vpcName))
}
if err != nil {
return nil, errors.Wrapf(err, "failed to query ec2 for VPCs by name %q", vpcName)
}
if len(out.Vpcs) > 1 {
return nil, awserrors.NewConflict(fmt.Sprintf("found %v VPCs with name %q. Only one VPC per cluster name is supported. Ensure duplicate VPCs are deleted for this AWS account and there are no conflicting instances of Cluster API Provider AWS. Filtered VPCs: %v", len(out.Vpcs), vpcName, out.GoString()))
}

switch *out.Vpcs[0].State {
case ec2.VpcStateAvailable, ec2.VpcStatePending:
default:
return nil, awserrors.NewNotFound(fmt.Sprintf("could not find available or pending VPC by name %q", vpcName))
}

vpc := &infrav1.VPCSpec{
ID: *out.Vpcs[0].VpcId,
CidrBlock: *out.Vpcs[0].CidrBlock,
Tags: converters.TagsToMap(out.Vpcs[0].Tags),
}
for _, set := range out.Vpcs[0].Ipv6CidrBlockAssociationSet {
if *set.Ipv6CidrBlockState.State == ec2.SubnetCidrBlockStateCodeAssociated {
vpc.IPv6 = &infrav1.IPv6{
CidrBlock: aws.StringValue(set.Ipv6CidrBlock),
PoolID: aws.StringValue(set.Ipv6Pool),
}
break
}
}
return vpc, nil
}

func (s *Service) getVPCTagParams(id string) infrav1.BuildParams {
name := fmt.Sprintf("%s-vpc", s.scope.Name())

Expand Down
Loading