-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix race-condition when IAM Role has been created previously #82
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -229,29 +229,28 @@ func (r *IamroleReconciler) HandleReconcile(ctx context.Context, req ctrl.Reques | |
fallthrough | ||
default: | ||
|
||
resp, err := r.IAMClient.CreateRole(ctx, *input) | ||
// Default behavior on new Iamrole resource state is to go off and create it | ||
resp, err := r.IAMClient.EnsureRole(ctx, *input) | ||
if err != nil { | ||
log.Error(err, "error in creating a role") | ||
state := iammanagerv1alpha1.Error | ||
|
||
if strings.Contains(err.Error(), awsapi.RoleAlreadyExistsError) { | ||
// This check verifies whether or not the IAM Role somehow already exists, but is allocated to another namespace based on the tag applied to it. | ||
if strings.Contains(err.Error(), awsapi.RoleExistsAlreadyForOtherNamespace) { | ||
state = iammanagerv1alpha1.RoleNameNotAvailable | ||
|
||
//Role itself is not created | ||
roleName = "" | ||
} | ||
r.Recorder.Event(iamRole, v1.EventTypeWarning, string(state), "Unable to create/update iam role due to error "+err.Error()) | ||
return r.UpdateStatus(ctx, iamRole, iammanagerv1alpha1.IamroleStatus{RetryCount: iamRole.Status.RetryCount + 1, RoleName: roleName, ErrorDescription: err.Error(), State: state, LastUpdatedTimestamp: metav1.Now()}, requeueTime) | ||
} | ||
|
||
//OK. Successful!! | ||
// Is this IRSA role? If yes, Create/update Service Account with required annotation | ||
flag, saName := utils.ParseIRSAAnnotation(ctx, iamRole) | ||
if flag { | ||
//CreateOrUpdateServiceAccount | ||
roleARN := resp.RoleARN | ||
if roleARN == "" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case - |
||
roleARN = iamRole.Status.RoleARN | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This status field only exists if the role has been fully created previously... but during an initial "create" phase, this is also an empty string.. |
||
} | ||
if err := k8s.NewK8sManagerClient(r.Client).CreateOrUpdateServiceAccount(ctx, saName, iamRole.Namespace, roleARN); err != nil { | ||
if err := k8s.NewK8sManagerClient(r.Client).CreateOrUpdateServiceAccount(ctx, saName, iamRole.Namespace, resp.RoleARN); err != nil { | ||
log.Error(err, "error in updating service account for IRSA role") | ||
r.Recorder.Event(iamRole, v1.EventTypeWarning, string(iammanagerv1alpha1.Error), "Unable to create/update service account for IRSA role due to error "+err.Error()) | ||
return r.UpdateStatus(ctx, iamRole, iammanagerv1alpha1.IamroleStatus{RetryCount: iamRole.Status.RetryCount + 1, RoleName: roleName, ErrorDescription: err.Error(), State: iammanagerv1alpha1.Error, LastUpdatedTimestamp: metav1.Now()}, requeueTime) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,15 +19,17 @@ import ( | |
//IAMIface defines interface methods | ||
type IAMIface interface { | ||
CreateRole(ctx context.Context, req IAMRoleRequest) | ||
EnsureRole(ctx context.Context, req IAMRoleRequest) | ||
UpdateRole(ctx context.Context, req IAMRoleRequest) | ||
DeleteRole(ctx context.Context, roleName string) | ||
GetRole(ctx context.Context, roleName string) | ||
AttachInlineRolePolicy(ctx context.Context, req IAMRoleRequest) | ||
AddPermissionBoundary(ctx context.Context, req IAMRoleRequest) error | ||
GetRolePolicy(ctx context.Context, req IAMRoleRequest) bool | ||
} | ||
|
||
const ( | ||
RoleAlreadyExistsError = "Please choose a different name" | ||
RoleExistsAlreadyForOtherNamespace = "Please choose a different name" | ||
) | ||
|
||
//IAMRoleRequest struct | ||
|
@@ -48,6 +50,20 @@ type IAMRoleResponse struct { | |
RoleID string | ||
} | ||
|
||
func NewIAMRoleResponseFromGetRole(output iam.GetRoleOutput) *IAMRoleResponse { | ||
return &IAMRoleResponse{ | ||
RoleARN: aws.StringValue(output.Role.Arn), | ||
RoleID: aws.StringValue(output.Role.RoleId), | ||
} | ||
} | ||
|
||
func NewIAMRoleResponseFromCreateRole(output iam.CreateRoleOutput) *IAMRoleResponse { | ||
return &IAMRoleResponse{ | ||
RoleARN: aws.StringValue(output.Role.Arn), | ||
RoleID: aws.StringValue(output.Role.RoleId), | ||
} | ||
} | ||
|
||
type IAM struct { | ||
Client iamiface.IAMAPI | ||
} | ||
|
@@ -63,53 +79,16 @@ func NewIAM(region string) *IAM { | |
} | ||
} | ||
|
||
// CreateRole creates/updates the role | ||
func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) { | ||
log := log.Logger(ctx, "awsapi", "iam", "CreateRole") | ||
// EnsureRole ensures that a role exists, and that it has the appropriate configuration | ||
func (i *IAM) EnsureRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) { | ||
log := log.Logger(ctx, "awsapi", "iam", "EnsureRole") | ||
log = log.WithValues("roleName", req.Name) | ||
log.V(1).Info("Initiating api call") | ||
input := &iam.CreateRoleInput{ | ||
AssumeRolePolicyDocument: aws.String(req.TrustPolicy), | ||
RoleName: aws.String(req.Name), | ||
Description: aws.String(req.Description), | ||
MaxSessionDuration: aws.Int64(req.SessionDuration), | ||
PermissionsBoundary: aws.String(req.ManagedPermissionBoundaryPolicy), | ||
} | ||
|
||
if err := input.Validate(); err != nil { | ||
log.Error(err, "input validation failed") | ||
return nil, err | ||
} | ||
|
||
roleAlreadyExists := false | ||
iResp, err := i.Client.CreateRole(input) | ||
// Get the role, or create it. | ||
log.V(1).Info("Verifying that IAM Role exists") | ||
role, err := i.GetOrCreateRole(ctx, req) | ||
if err != nil { | ||
if aerr, ok := err.(awserr.Error); ok { | ||
switch aerr.Code() { | ||
// Update the role to the latest spec if it is existed already | ||
case iam.ErrCodeEntityAlreadyExistsException: | ||
roleAlreadyExists = true | ||
log.Info(iam.ErrCodeEntityAlreadyExistsException) | ||
case iam.ErrCodeLimitExceededException: | ||
log.Error(err, iam.ErrCodeLimitExceededException) | ||
case iam.ErrCodeNoSuchEntityException: | ||
log.Error(err, iam.ErrCodeNoSuchEntityException) | ||
case iam.ErrCodeServiceFailureException: | ||
log.Error(err, iam.ErrCodeServiceFailureException) | ||
default: | ||
log.Error(err, aerr.Error()) | ||
} | ||
} | ||
if !roleAlreadyExists { | ||
return nil, err | ||
} | ||
} | ||
|
||
resp := &IAMRoleResponse{} | ||
|
||
if !roleAlreadyExists { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So if an exception for |
||
resp.RoleARN = aws.StringValue(iResp.Role.Arn) | ||
resp.RoleID = aws.StringValue(iResp.Role.RoleId) | ||
return &IAMRoleResponse{}, err | ||
} | ||
|
||
//Verify tags | ||
|
@@ -156,16 +135,51 @@ func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleRespo | |
return &IAMRoleResponse{}, err | ||
} | ||
|
||
return resp, nil | ||
return role, nil | ||
} | ||
|
||
// GetOrCreateRole will try to create a new IAM Role in AWS. If it exists already, it will | ||
// use that role. In either case we return back an IAMRoleResponse{} object. | ||
func (i *IAM) GetOrCreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) { | ||
log := log.Logger(ctx, "awsapi", "iam", "GetOrCreateRole") | ||
log = log.WithValues("roleName", req.Name) | ||
|
||
// Try getting the role. If the role already exists, just return it. | ||
getResp, err := i.GetRole(ctx, req) | ||
if err != nil { | ||
if aerr, ok := err.(awserr.Error); ok { | ||
switch aerr.Code() { | ||
case iam.ErrCodeNoSuchEntityException: | ||
log.Info("Role does not already exist") | ||
default: | ||
return nil, err | ||
} | ||
} | ||
} | ||
if getResp != nil { | ||
return NewIAMRoleResponseFromGetRole(*getResp), nil | ||
} | ||
|
||
// If the role already exists - we'll figure that out based on the error that is returned by AWS. | ||
createResp, err := i.CreateRole(ctx, req) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if createResp != nil { | ||
return NewIAMRoleResponseFromCreateRole(*createResp), nil | ||
} | ||
|
||
// if we got here, something really strange happened.. we got no errors, but also no responses? | ||
return nil, nil | ||
} | ||
|
||
//VerifyTags function verifies the tags attached to the role | ||
func (i *IAM) VerifyTags(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) { | ||
log := log.Logger(ctx, "awsapi", "iam", "VerifyTags") | ||
log = log.WithValues("roleName", req.Name) | ||
log.V(1).Info("Initiating api call") | ||
//Lets first list the tags and look for namespace and cluster tags | ||
|
||
//Lets first list the tags and look for namespace and cluster tags | ||
listTags, err := i.Client.ListRoleTags(&iam.ListRoleTagsInput{ | ||
RoleName: aws.String(req.Name), | ||
}) | ||
|
@@ -191,7 +205,7 @@ func (i *IAM) VerifyTags(ctx context.Context, req IAMRoleRequest) (*IAMRoleRespo | |
} | ||
|
||
if flag { | ||
return nil, fmt.Errorf("role name %s in AWS is not available. %s", req.Name, RoleAlreadyExistsError) | ||
return nil, fmt.Errorf("role name %s in AWS is not available. %s", req.Name, RoleExistsAlreadyForOtherNamespace) | ||
} | ||
|
||
return &IAMRoleResponse{}, nil | ||
|
@@ -405,27 +419,36 @@ func (i *IAM) AttachInlineRolePolicy(ctx context.Context, req IAMRoleRequest) (* | |
return &IAMRoleResponse{}, nil | ||
} | ||
|
||
//GetRole gets the role from aws iam | ||
func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutput, error) { | ||
log := log.Logger(ctx, "awsapi", "iam", "GetRole") | ||
// CreateRole will try to create an IAM Role, or return back Nil if it can not be created | ||
func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*iam.CreateRoleOutput, error) { | ||
log := log.Logger(ctx, "awsapi", "iam", "CreateRole") | ||
log = log.WithValues("roleName", req.Name) | ||
log.V(1).Info("Initiating api call") | ||
// First get the iam role policy on the AWS IAM side | ||
input := &iam.GetRoleInput{ | ||
RoleName: aws.String(req.Name), | ||
|
||
input := &iam.CreateRoleInput{ | ||
AssumeRolePolicyDocument: aws.String(req.TrustPolicy), | ||
RoleName: aws.String(req.Name), | ||
Description: aws.String(req.Description), | ||
MaxSessionDuration: aws.Int64(req.SessionDuration), | ||
PermissionsBoundary: aws.String(req.ManagedPermissionBoundaryPolicy), | ||
} | ||
|
||
if err := input.Validate(); err != nil { | ||
log.Error(err, "input validation failed") | ||
//should log the error | ||
return nil, err | ||
} | ||
|
||
resp, err := i.Client.GetRole(input) | ||
// If the role already exists - we'll figure that out based on the error that is returned by AWS. | ||
log.V(1).Info("Initiating api call") | ||
resp, err := i.Client.CreateRole(input) | ||
|
||
if err != nil { | ||
if aerr, ok := err.(awserr.Error); ok { | ||
switch aerr.Code() { | ||
// Update the role to the latest spec if it is existed already | ||
case iam.ErrCodeEntityAlreadyExistsException: | ||
log.Info(iam.ErrCodeEntityAlreadyExistsException) | ||
case iam.ErrCodeLimitExceededException: | ||
log.Error(err, iam.ErrCodeLimitExceededException) | ||
case iam.ErrCodeNoSuchEntityException: | ||
log.Error(err, iam.ErrCodeNoSuchEntityException) | ||
case iam.ErrCodeServiceFailureException: | ||
|
@@ -437,6 +460,31 @@ func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutp | |
|
||
return nil, err | ||
} | ||
log.V(1).Info("Successfully created the role") | ||
|
||
return resp, nil | ||
} | ||
|
||
//GetRole gets the role from aws iam | ||
func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutput, error) { | ||
log := log.Logger(ctx, "awsapi", "iam", "GetRole") | ||
log = log.WithValues("roleName", req.Name) | ||
log.V(1).Info("Initiating api call") | ||
// First get the iam role policy on the AWS IAM side | ||
input := &iam.GetRoleInput{ | ||
RoleName: aws.String(req.Name), | ||
} | ||
|
||
if err := input.Validate(); err != nil { | ||
log.Error(err, "input validation failed") | ||
//should log the error | ||
return nil, err | ||
} | ||
|
||
resp, err := i.Client.GetRole(input) | ||
if err != nil { | ||
return nil, err | ||
} | ||
log.V(1).Info("Successfully able to get the role") | ||
|
||
return resp, nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I renamed this exception string because it really wasn't checking if the role exists, but was checking if it exists and was tagged for another namespace.