Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race-condition when IAM Role has been created previously #82

Merged
merged 2 commits into from
Jul 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions controllers/iamrole_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,29 +229,28 @@ func (r *IamroleReconciler) HandleReconcile(ctx context.Context, req ctrl.Reques
fallthrough
default:

resp, err := r.IAMClient.CreateRole(ctx, *input)
// Default behavior on new Iamrole resource state is to go off and create it
resp, err := r.IAMClient.EnsureRole(ctx, *input)
if err != nil {
log.Error(err, "error in creating a role")
state := iammanagerv1alpha1.Error

if strings.Contains(err.Error(), awsapi.RoleAlreadyExistsError) {
// This check verifies whether or not the IAM Role somehow already exists, but is allocated to another namespace based on the tag applied to it.
if strings.Contains(err.Error(), awsapi.RoleExistsAlreadyForOtherNamespace) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed this exception string because it really wasn't checking if the role exists, but was checking if it exists and was tagged for another namespace.

state = iammanagerv1alpha1.RoleNameNotAvailable

//Role itself is not created
roleName = ""
}
r.Recorder.Event(iamRole, v1.EventTypeWarning, string(state), "Unable to create/update iam role due to error "+err.Error())
return r.UpdateStatus(ctx, iamRole, iammanagerv1alpha1.IamroleStatus{RetryCount: iamRole.Status.RetryCount + 1, RoleName: roleName, ErrorDescription: err.Error(), State: state, LastUpdatedTimestamp: metav1.Now()}, requeueTime)
}

//OK. Successful!!
// Is this IRSA role? If yes, Create/update Service Account with required annotation
flag, saName := utils.ParseIRSAAnnotation(ctx, iamRole)
if flag {
//CreateOrUpdateServiceAccount
roleARN := resp.RoleARN
if roleARN == "" {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case - roleARN has the chance to be "" if the Role already exists in AWS. Check out my comment at https://github.com/keikoproj/iam-manager/pull/82/files#diff-cca9ed36302b8c25c0285718b2a7c46e3d65997b8731430231b4675d68fe38b4L110-L112.

roleARN = iamRole.Status.RoleARN
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This status field only exists if the role has been fully created previously... but during an initial "create" phase, this is also an empty string..

}
if err := k8s.NewK8sManagerClient(r.Client).CreateOrUpdateServiceAccount(ctx, saName, iamRole.Namespace, roleARN); err != nil {
if err := k8s.NewK8sManagerClient(r.Client).CreateOrUpdateServiceAccount(ctx, saName, iamRole.Namespace, resp.RoleARN); err != nil {
log.Error(err, "error in updating service account for IRSA role")
r.Recorder.Event(iamRole, v1.EventTypeWarning, string(iammanagerv1alpha1.Error), "Unable to create/update service account for IRSA role due to error "+err.Error())
return r.UpdateStatus(ctx, iamRole, iammanagerv1alpha1.IamroleStatus{RetryCount: iamRole.Status.RetryCount + 1, RoleName: roleName, ErrorDescription: err.Error(), State: iammanagerv1alpha1.Error, LastUpdatedTimestamp: metav1.Now()}, requeueTime)
Expand Down
162 changes: 105 additions & 57 deletions pkg/awsapi/iam.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@ import (
//IAMIface defines interface methods
type IAMIface interface {
CreateRole(ctx context.Context, req IAMRoleRequest)
EnsureRole(ctx context.Context, req IAMRoleRequest)
UpdateRole(ctx context.Context, req IAMRoleRequest)
DeleteRole(ctx context.Context, roleName string)
GetRole(ctx context.Context, roleName string)
AttachInlineRolePolicy(ctx context.Context, req IAMRoleRequest)
AddPermissionBoundary(ctx context.Context, req IAMRoleRequest) error
GetRolePolicy(ctx context.Context, req IAMRoleRequest) bool
}

const (
RoleAlreadyExistsError = "Please choose a different name"
RoleExistsAlreadyForOtherNamespace = "Please choose a different name"
)

//IAMRoleRequest struct
Expand All @@ -48,6 +50,20 @@ type IAMRoleResponse struct {
RoleID string
}

func NewIAMRoleResponseFromGetRole(output iam.GetRoleOutput) *IAMRoleResponse {
return &IAMRoleResponse{
RoleARN: aws.StringValue(output.Role.Arn),
RoleID: aws.StringValue(output.Role.RoleId),
}
}

func NewIAMRoleResponseFromCreateRole(output iam.CreateRoleOutput) *IAMRoleResponse {
return &IAMRoleResponse{
RoleARN: aws.StringValue(output.Role.Arn),
RoleID: aws.StringValue(output.Role.RoleId),
}
}

type IAM struct {
Client iamiface.IAMAPI
}
Expand All @@ -63,53 +79,16 @@ func NewIAM(region string) *IAM {
}
}

// CreateRole creates/updates the role
func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "CreateRole")
// EnsureRole ensures that a role exists, and that it has the appropriate configuration
func (i *IAM) EnsureRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "EnsureRole")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
input := &iam.CreateRoleInput{
AssumeRolePolicyDocument: aws.String(req.TrustPolicy),
RoleName: aws.String(req.Name),
Description: aws.String(req.Description),
MaxSessionDuration: aws.Int64(req.SessionDuration),
PermissionsBoundary: aws.String(req.ManagedPermissionBoundaryPolicy),
}

if err := input.Validate(); err != nil {
log.Error(err, "input validation failed")
return nil, err
}

roleAlreadyExists := false
iResp, err := i.Client.CreateRole(input)
// Get the role, or create it.
log.V(1).Info("Verifying that IAM Role exists")
role, err := i.GetOrCreateRole(ctx, req)
if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
// Update the role to the latest spec if it is existed already
case iam.ErrCodeEntityAlreadyExistsException:
roleAlreadyExists = true
log.Info(iam.ErrCodeEntityAlreadyExistsException)
case iam.ErrCodeLimitExceededException:
log.Error(err, iam.ErrCodeLimitExceededException)
case iam.ErrCodeNoSuchEntityException:
log.Error(err, iam.ErrCodeNoSuchEntityException)
case iam.ErrCodeServiceFailureException:
log.Error(err, iam.ErrCodeServiceFailureException)
default:
log.Error(err, aerr.Error())
}
}
if !roleAlreadyExists {
return nil, err
}
}

resp := &IAMRoleResponse{}

if !roleAlreadyExists {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if an exception for ErrCodeEntityAlreadyExistsException comes back, then this is set True. If it is set to True, the code never sets the resp.RoleARN or resp.RoleID fields in the IAMRoleResponse object, which is what triggers the ServiceAccount annotation to get set to an empty string. In the event that the role already exists, we want the code to be intelligent and get the ARN/ID for us.

resp.RoleARN = aws.StringValue(iResp.Role.Arn)
resp.RoleID = aws.StringValue(iResp.Role.RoleId)
return &IAMRoleResponse{}, err
}

//Verify tags
Expand Down Expand Up @@ -156,16 +135,51 @@ func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleRespo
return &IAMRoleResponse{}, err
}

return resp, nil
return role, nil
}

// GetOrCreateRole will try to create a new IAM Role in AWS. If it exists already, it will
// use that role. In either case we return back an IAMRoleResponse{} object.
func (i *IAM) GetOrCreateRole(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "GetOrCreateRole")
log = log.WithValues("roleName", req.Name)

// Try getting the role. If the role already exists, just return it.
getResp, err := i.GetRole(ctx, req)
if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
case iam.ErrCodeNoSuchEntityException:
log.Info("Role does not already exist")
default:
return nil, err
}
}
}
if getResp != nil {
return NewIAMRoleResponseFromGetRole(*getResp), nil
}

// If the role already exists - we'll figure that out based on the error that is returned by AWS.
createResp, err := i.CreateRole(ctx, req)
if err != nil {
return nil, err
}
if createResp != nil {
return NewIAMRoleResponseFromCreateRole(*createResp), nil
}

// if we got here, something really strange happened.. we got no errors, but also no responses?
return nil, nil
}

//VerifyTags function verifies the tags attached to the role
func (i *IAM) VerifyTags(ctx context.Context, req IAMRoleRequest) (*IAMRoleResponse, error) {
log := log.Logger(ctx, "awsapi", "iam", "VerifyTags")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
//Lets first list the tags and look for namespace and cluster tags

//Lets first list the tags and look for namespace and cluster tags
listTags, err := i.Client.ListRoleTags(&iam.ListRoleTagsInput{
RoleName: aws.String(req.Name),
})
Expand All @@ -191,7 +205,7 @@ func (i *IAM) VerifyTags(ctx context.Context, req IAMRoleRequest) (*IAMRoleRespo
}

if flag {
return nil, fmt.Errorf("role name %s in AWS is not available. %s", req.Name, RoleAlreadyExistsError)
return nil, fmt.Errorf("role name %s in AWS is not available. %s", req.Name, RoleExistsAlreadyForOtherNamespace)
}

return &IAMRoleResponse{}, nil
Expand Down Expand Up @@ -405,27 +419,36 @@ func (i *IAM) AttachInlineRolePolicy(ctx context.Context, req IAMRoleRequest) (*
return &IAMRoleResponse{}, nil
}

//GetRole gets the role from aws iam
func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutput, error) {
log := log.Logger(ctx, "awsapi", "iam", "GetRole")
// CreateRole will try to create an IAM Role, or return back Nil if it can not be created
func (i *IAM) CreateRole(ctx context.Context, req IAMRoleRequest) (*iam.CreateRoleOutput, error) {
log := log.Logger(ctx, "awsapi", "iam", "CreateRole")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
// First get the iam role policy on the AWS IAM side
input := &iam.GetRoleInput{
RoleName: aws.String(req.Name),

input := &iam.CreateRoleInput{
AssumeRolePolicyDocument: aws.String(req.TrustPolicy),
RoleName: aws.String(req.Name),
Description: aws.String(req.Description),
MaxSessionDuration: aws.Int64(req.SessionDuration),
PermissionsBoundary: aws.String(req.ManagedPermissionBoundaryPolicy),
}

if err := input.Validate(); err != nil {
log.Error(err, "input validation failed")
//should log the error
return nil, err
}

resp, err := i.Client.GetRole(input)
// If the role already exists - we'll figure that out based on the error that is returned by AWS.
log.V(1).Info("Initiating api call")
resp, err := i.Client.CreateRole(input)

if err != nil {
if aerr, ok := err.(awserr.Error); ok {
switch aerr.Code() {
// Update the role to the latest spec if it is existed already
case iam.ErrCodeEntityAlreadyExistsException:
log.Info(iam.ErrCodeEntityAlreadyExistsException)
case iam.ErrCodeLimitExceededException:
log.Error(err, iam.ErrCodeLimitExceededException)
case iam.ErrCodeNoSuchEntityException:
log.Error(err, iam.ErrCodeNoSuchEntityException)
case iam.ErrCodeServiceFailureException:
Expand All @@ -437,6 +460,31 @@ func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutp

return nil, err
}
log.V(1).Info("Successfully created the role")

return resp, nil
}

//GetRole gets the role from aws iam
func (i *IAM) GetRole(ctx context.Context, req IAMRoleRequest) (*iam.GetRoleOutput, error) {
log := log.Logger(ctx, "awsapi", "iam", "GetRole")
log = log.WithValues("roleName", req.Name)
log.V(1).Info("Initiating api call")
// First get the iam role policy on the AWS IAM side
input := &iam.GetRoleInput{
RoleName: aws.String(req.Name),
}

if err := input.Validate(); err != nil {
log.Error(err, "input validation failed")
//should log the error
return nil, err
}

resp, err := i.Client.GetRole(input)
if err != nil {
return nil, err
}
log.V(1).Info("Successfully able to get the role")

return resp, nil
Expand Down
Loading