diff --git a/controllers/scanapi/conditions.go b/controllers/scanapi/conditions.go index 5c96785c..573be90b 100644 --- a/controllers/scanapi/conditions.go +++ b/controllers/scanapi/conditions.go @@ -48,7 +48,6 @@ func updateScanAPIConditions(config *mondoov1alpha2.MondooAuditConfig, degradedS } currentPod := k8s.GetNewestPodFromList(pods.Items) - logger.Info("ScanAPI controller is unavailable", " pod ", currentPod.Status.ContainerStatuses) for i, containerStatus := range currentPod.Status.ContainerStatuses { if containerStatus.Name != "cnspec" { continue @@ -60,6 +59,8 @@ func updateScanAPIConditions(config *mondoov1alpha2.MondooAuditConfig, degradedS status = corev1.ConditionTrue affectedPods = append(affectedPods, currentPod.Name) memoryLimit = currentPod.Spec.Containers[i].Resources.Limits.Memory().String() + logger.Info("ScanAPI OOM detected", " pod ", currentPod.Name, "memory limit", memoryLimit) + break } } diff --git a/go.mod b/go.mod index 65e4644c..91e3ad60 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/gobwas/glob v0.2.3 github.com/golang-jwt/jwt/v4 v4.5.0 github.com/golang/mock v1.6.0 - github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.70.0 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 github.com/rs/zerolog v1.32.0 github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.8.4 @@ -330,16 +330,16 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/go-cmp v0.6.0 - github.com/google/go-containerregistry v0.17.0 + github.com/google/go-containerregistry v0.19.0 github.com/google/gofuzz v1.2.0 // indirect - github.com/google/uuid v1.5.0 + github.com/google/uuid v1.6.0 github.com/imdario/mergo v0.3.16 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_golang v1.18.0 - github.com/prometheus/client_model v0.5.0 + github.com/prometheus/client_model v0.6.0 github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/spf13/pflag v1.0.6-0.20201009195203-85dd5c8bc61c // indirect diff --git a/go.sum b/go.sum index 54057b6a..be86436e 100644 --- a/go.sum +++ b/go.sum @@ -512,8 +512,8 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-containerregistry v0.17.0 h1:5p+zYs/R4VGHkhyvgWurWrpJ2hW4Vv9fQI+GzdcwXLk= -github.com/google/go-containerregistry v0.17.0/go.mod h1:u0qB2l7mvtWVR5kNcbFIhFY1hLbf8eeGapA+vbFDCtQ= +github.com/google/go-containerregistry v0.19.0 h1:uIsMRBV7m/HDkDxE/nXMnv1q+lOOSPlQ/ywc5JbB8Ic= +github.com/google/go-containerregistry v0.19.0/go.mod h1:u0qB2l7mvtWVR5kNcbFIhFY1hLbf8eeGapA+vbFDCtQ= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -544,8 +544,8 @@ github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm4 github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= -github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= @@ -871,8 +871,8 @@ github.com/polyfloyd/go-errorlint v1.4.5 h1:70YWmMy4FgRHehGNOUask3HtSFSOLKgmDn7r github.com/polyfloyd/go-errorlint v1.4.5/go.mod h1:sIZEbFoDOCnTYYZoVkjc4hTnM459tuWA9H/EkdXwsKk= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.70.0 h1:CFTvpkpVP4EXXZuaZuxpikAoma8xVha/IZKMDc9lw+Y= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.70.0/go.mod h1:npfc20mPOAu7ViOVnATVMbI7PoXvW99EzgJVqkAomIQ= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 h1:HZdPRm0ApWPg7F4sHgbqWkL+ddWfpTZsopm5HM/2g4o= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2/go.mod h1:3RiUkFmR9kmPZi9r/8a5jw0a9yg+LMmr7qa0wjqvSiI= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= @@ -885,8 +885,8 @@ github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1: github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= +github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= +github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= diff --git a/tests/framework/installer/installer.go b/tests/framework/installer/installer.go index 8751207f..baa13ef1 100644 --- a/tests/framework/installer/installer.go +++ b/tests/framework/installer/installer.go @@ -96,7 +96,7 @@ func (i *MondooInstaller) InstallOperator() error { return fmt.Errorf("failed to create mondoo-operator manifest(s): %v ", err) } - // Disable the resource monitor for the integratio ntests to make sure we don't run scans in parallel + // Disable the resource monitor for the integration tests to make sure we don't run scans in parallel err = i.K8sHelper.ExecuteWithRetries(func() (bool, error) { deployment := &appsv1.Deployment{} if err := i.K8sHelper.Clientset.Get( diff --git a/tests/framework/utils/k8s_helper.go b/tests/framework/utils/k8s_helper.go index af1ca3f9..8c675e9d 100644 --- a/tests/framework/utils/k8s_helper.go +++ b/tests/framework/utils/k8s_helper.go @@ -603,7 +603,9 @@ func (k8sh *K8sHelper) GetMondooAuditConfigConditionByType(auditConfig *api.Mond } // CheckForDegradedCondition Check whether specified Condition is in degraded state in a MondooAuditConfig with retries. -func (k8sh *K8sHelper) CheckForDegradedCondition(auditConfig *api.MondooAuditConfig, conditionType api.MondooAuditConfigConditionType, conditionStatus v1.ConditionStatus) error { +func (k8sh *K8sHelper) CheckForDegradedCondition( + auditConfig *api.MondooAuditConfig, conditionType api.MondooAuditConfigConditionType, conditionStatus v1.ConditionStatus, msg string, +) error { err := k8sh.ExecuteWithRetries(func() (bool, error) { // Condition of MondooAuditConfig should be updated foundMondooAuditConfig, err := k8sh.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) @@ -614,7 +616,8 @@ func (k8sh *K8sHelper) CheckForDegradedCondition(auditConfig *api.MondooAuditCon if err != nil { return false, nil // The condition might not exist yet. This doesn't mean we should stop trying. } - if condition.Status == conditionStatus { + // If there is a msg specified then test for message too + if condition.Status == conditionStatus && (msg == "" || (msg != "" && strings.Contains(condition.Message, msg))) { return true, nil } return false, nil diff --git a/tests/integration/audit_config_base_suite.go b/tests/integration/audit_config_base_suite.go index 2e79c560..cffe4392 100644 --- a/tests/integration/audit_config_base_suite.go +++ b/tests/integration/audit_config_base_suite.go @@ -498,7 +498,7 @@ func (s *AuditConfigBaseSuite) verifyAdmissionWorking(auditConfig mondoov2.Mondo s.verifyWebhookAndStart(webhookListOpts, caCert) - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.AdmissionDegraded, corev1.ConditionFalse) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.AdmissionDegraded, corev1.ConditionFalse, "") s.NoErrorf(err, "Admission shouldn't be in degraded state") err = s.testCluster.K8sHelper.CheckForReconciledOperatorVersion(&auditConfig, version.Version) @@ -541,7 +541,7 @@ func (s *AuditConfigBaseSuite) testMondooAuditConfigAdmissionScaleDownScanApi(au s.NoErrorf(err, "Scan API Pod did not get deleted") zap.S().Info("MondooAuditConfig condition should be updated to degraded.") - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.AdmissionDegraded, corev1.ConditionTrue) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.AdmissionDegraded, corev1.ConditionTrue, "") s.NoErrorf(err, "Admission should be in degraded state") // try to change deployment => should fail @@ -694,13 +694,13 @@ func (s *AuditConfigBaseSuite) testUpgradePreviousReleaseToLatest(auditConfig mo // Verify scan API deployment and service s.validateScanApiDeployment(auditConfig) - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.AdmissionDegraded, corev1.ConditionFalse) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.AdmissionDegraded, corev1.ConditionFalse, "") s.Require().NoErrorf(err, "Admission shouldn't be in degraded state") - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.NodeScanningDegraded, corev1.ConditionFalse) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.NodeScanningDegraded, corev1.ConditionFalse, "") s.Require().NoErrorf(err, "Node scanning shouldn't be in degraded state") - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.K8sResourcesScanningDegraded, corev1.ConditionFalse) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.K8sResourcesScanningDegraded, corev1.ConditionFalse, "") s.Require().NoErrorf(err, "k8s resource scanning shouldn't be in degraded state") // everything is fine, now upgrade to current branch/release diff --git a/tests/integration/audit_config_oom_test.go b/tests/integration/audit_config_oom_test.go index bed3131c..09a130e5 100644 --- a/tests/integration/audit_config_oom_test.go +++ b/tests/integration/audit_config_oom_test.go @@ -64,7 +64,7 @@ func (s *AuditConfigOOMSuite) TestOOMControllerReporting() { // a new replicaset should be created // the first Pod tries to start and gets killed // on the 2nd start we should get an OOMkilled status update - err := s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.MondooOperatorDegraded, corev1.ConditionTrue) + err := s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.MondooOperatorDegraded, corev1.ConditionTrue, "OOM") s.Require().NoError(err, "Failed to find degraded condition") foundMondooAuditConfig, err := s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) @@ -95,7 +95,7 @@ func (s *AuditConfigOOMSuite) TestOOMControllerReporting() { zap.S().Info("Increasing memory limit to get controller running again.") s.NoError(s.testCluster.K8sHelper.Clientset.Update(s.ctx, &operatorDeployment)) - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.MondooOperatorDegraded, corev1.ConditionFalse) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.MondooOperatorDegraded, corev1.ConditionFalse, "") s.Require().NoError(err, "Failed to find degraded condition") foundMondooAuditConfig, err = s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) s.NoError(err, "Failed to find MondooAuditConfig") @@ -139,13 +139,14 @@ func (s *AuditConfigOOMSuite) TestOOMScanAPI() { // This will take some time, because: // reconcile needs to happen - err := s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.ScanAPIDegraded, corev1.ConditionTrue) + err := s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.ScanAPIDegraded, corev1.ConditionTrue, "OOM") s.Require().NoError(err, "Failed to find degraded condition") foundMondooAuditConfig, err := s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) - s.NoError(err, "Failed to find MondooAuditConfig") + s.Require().NoError(err) cond := mondoo.FindMondooAuditConditions(foundMondooAuditConfig.Status.Conditions, mondoov2.ScanAPIDegraded) + s.Require().NoError(err, "Failed to find degraded condition") s.Require().NotNil(cond) s.Containsf(cond.Message, "OOM", "Failed to find OOMKilled message in degraded condition") s.Len(cond.AffectedPods, 1, "Failed to find only one pod in degraded condition") @@ -164,7 +165,7 @@ func (s *AuditConfigOOMSuite) TestOOMScanAPI() { }) s.Require().NoError(err) - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.ScanAPIDegraded, corev1.ConditionFalse) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.ScanAPIDegraded, corev1.ConditionFalse, "") s.Require().NoError(err, "Failed to find degraded condition") foundMondooAuditConfig, err = s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) s.NoError(err, "Failed to find MondooAuditConfig") @@ -228,7 +229,7 @@ func (s *AuditConfigOOMSuite) TestOOMNodeScan() { // a new replicaset should be created // the first Pod tries to start and gets killed // on the 2nd start we should get an OOMkilled status update - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.NodeScanningDegraded, corev1.ConditionTrue) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.NodeScanningDegraded, corev1.ConditionTrue, "OOM") s.Require().NoError(err, "Failed to find degraded condition") foundMondooAuditConfig, err := s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) @@ -257,7 +258,7 @@ func (s *AuditConfigOOMSuite) TestOOMNodeScan() { // Wait for the next run of the CronJob time.Sleep(30 * time.Second) - err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.NodeScanningDegraded, corev1.ConditionFalse) + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.NodeScanningDegraded, corev1.ConditionFalse, "") s.Require().NoError(err, "Failed to find degraded condition") foundMondooAuditConfig, err = s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) s.NoError(err, "Failed to find MondooAuditConfig")