From cf80d0472c9f23454bce191ea302dbaefafe5f6e Mon Sep 17 00:00:00 2001 From: Mario Nitchev Date: Thu, 20 Jun 2024 17:27:33 +0300 Subject: [PATCH] Fix false errors in `irsa_operator_cluster_errors` metric (#260) * increase backoff * add backoff to cname validation * increase backoff * fix error * update changelog * add comment --- CHANGELOG.md | 6 ++++++ pkg/irsa/capa/capa.go | 23 ++++++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0071c263..d5ac5419 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Increase backoff total time to 75 seconds. +- Add backoff when getting validation CNAME. +- Fix secret update error. + ## [0.27.3] - 2024-06-19 ### Fixed diff --git a/pkg/irsa/capa/capa.go b/pkg/irsa/capa/capa.go index e9c8c24e..0d3a959f 100644 --- a/pkg/irsa/capa/capa.go +++ b/pkg/irsa/capa/capa.go @@ -61,7 +61,10 @@ func (s *Service) Reconcile(ctx context.Context, outRequeueAfter *time.Duration) s.Scope.Logger().Info("Reconciling AWSCluster CR for IRSA") - b := backoff.NewMaxRetries(3, 5*time.Second) + // Most operations that require polling are quick, however some can take up + // to a minute to complete. Currently 75 seconds covers most of the the + // errors that can occur. + b := backoff.NewMaxRetries(15, 5*time.Second) err := s.S3.IsBucketReady(s.Scope.BucketName()) // Check if S3 bucket exists if err != nil { @@ -143,7 +146,13 @@ func (s *Service) Reconcile(ctx context.Context, outRequeueAfter *time.Duration) if !validated { // Check if DNS record is present - cname, err := s.ACM.GetValidationCNAME(*certificateArn) + var cname *route53.CNAME + getValidationCNAME := func() error { + var err error + cname, err = s.ACM.GetValidationCNAME(*certificateArn) + return err + } + err = backoff.Retry(getValidationCNAME, b) if err != nil { ctrlmetrics.Errors.WithLabelValues(s.Scope.Installation(), s.Scope.AccountID(), s.Scope.ClusterName(), s.Scope.ClusterNamespace()).Inc() s.Scope.Logger().Error(err, "failed to get ACM certificate's validation DNS record details") @@ -220,13 +229,9 @@ func (s *Service) Reconcile(ctx context.Context, outRequeueAfter *time.Duration) } // create new OIDC Cloudfront config - cfConfig := &v1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: s.Scope.ConfigName(), - Namespace: s.Scope.ClusterNamespace(), - }, - StringData: data, - } + cfConfig.Name = s.Scope.ConfigName() + cfConfig.Namespace = s.Scope.ClusterNamespace() + cfConfig.StringData = data if err := s.Client.Create(ctx, cfConfig); err != nil { ctrlmetrics.Errors.WithLabelValues(s.Scope.Installation(), s.Scope.AccountID(), s.Scope.ClusterName(), s.Scope.ClusterNamespace()).Inc()