diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 281a0e630..589707a5b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,8 @@ on: [push] jobs: ruby-tests: runs-on: ubuntu-latest + env: + CI: true name: "Tests (${{matrix.test_suite}}) - Ruby ${{ matrix.ruby }} with Kubernetes ${{ matrix.kubernetes_version }}" strategy: diff --git a/test/helpers/test_provisioner.rb b/test/helpers/test_provisioner.rb index c439ad0c5..79c5ce1f5 100644 --- a/test/helpers/test_provisioner.rb +++ b/test/helpers/test_provisioner.rb @@ -52,10 +52,25 @@ def prepare_pv(name, storage_class_name: nil) private + def wait_for_default_service_account(kubeclient, namespace) + 30.times do + begin + sa = kubeclient.get_service_account('default', namespace) + return if sa + rescue Kubeclient::ResourceNotFoundError + # If the service account is not found, sleep for a second and then retry + sleep(1) + end + end + raise "Default service account in #{namespace} not ready after 30 seconds" + end + def create_namespace(namespace) ns = Kubeclient::Resource.new(kind: 'Namespace') ns.metadata = { name: namespace } kubeclient.create_namespace(ns) + # wait for the serviceaccount 'default' to be created; https://github.com/kubernetes/kubernetes/issues/66689 + wait_for_default_service_account(kubeclient, namespace) end end end diff --git a/test/integration/krane_deploy_test.rb b/test/integration/krane_deploy_test.rb index 2ce4b83a9..c6fd4e74a 100644 --- a/test/integration/krane_deploy_test.rb +++ b/test/integration/krane_deploy_test.rb @@ -449,7 +449,7 @@ def test_output_of_failed_unmanaged_pod assert_logs_match_all([ "Failed to deploy 1 priority resource", "Pod status: Failed.", - "no such file or directory", + *("no such file or directory" if ENV['CI'] == 'true'), ], in_order: true) end @@ -508,7 +508,9 @@ def test_unrunnable_container_on_deployment_pod_fails_quickly "Logs from container 'successful-init'", "Log from successful init container", ], in_order: true) - assert_logs_match("no such file or directory") + if ENV['CI'] == 'true' + assert_logs_match("no such file or directory") + end end def test_wait_false_still_waits_for_priority_resources @@ -701,7 +703,10 @@ def test_deploy_result_logging_for_mixed_result_deploy %r{Deployment/bad-probe: TIMED OUT \(progress deadline: \d+s\)}, "Timeout reason: ProgressDeadlineExceeded", ] - end_bad_probe_logs = ["Scaled up replica set bad-probe-"] # event + + end_bad_probe_logs = [ + *("Scaled up replica set bad-probe-" if ENV['CI'] == 'true') #event + ] # Debug info for bad probe timeout assert_logs_match_all(start_bad_probe_logs + [ @@ -719,7 +724,7 @@ def test_deploy_result_logging_for_mixed_result_deploy "Timeout reason: ProgressDeadlineExceeded", /Latest ReplicaSet: missing-volumes-\w+/, "Final status: 1 replica, 1 updatedReplica, 1 unavailableReplica", - /FailedMount.*secrets? "catphotoscom" not found/, # event + *(%r{.*FailedMount.*secret "catphotoscom" not found.*} if ENV['CI'] == 'true'), #event ], in_order: true) # Debug info for failure @@ -729,7 +734,7 @@ def test_deploy_result_logging_for_mixed_result_deploy "The following containers are in a state that is unlikely to be recoverable:", "init-crash-loop-back-off: Crashing repeatedly (exit 1). See logs for more information.", "Final status: 1 replica, 1 updatedReplica, 1 unavailableReplica", - "Scaled up replica set init-crash-", # event + *("Scaled up replica set init-crash-" if ENV['CI'] == 'true'), "this is a log from the crashing init container", ], in_order: true) @@ -1113,8 +1118,8 @@ def test_bad_container_on_daemon_sets_fails "DaemonSet/crash-loop: FAILED", "crash-loop-back-off: Crashing repeatedly (exit 1). See logs for more information.", "Final status: #{num_ds} updatedNumberScheduled, #{num_ds} desiredNumberScheduled, 0 numberReady", - "Events (common success events excluded):", - "BackOff: Back-off restarting failed container", + *("Events (common success events excluded):" if ENV['CI'] == 'true'), + *("BackOff: Back-off restarting failed container" if ENV['CI'] == 'true'), "Logs from container 'crash-loop-back-off':", "this is a log from the crashing container", ], in_order: true) @@ -1134,8 +1139,8 @@ def test_bad_container_on_stateful_sets_fails_with_rolling_update "Successfully deployed 1 resource and failed to deploy 1 resource", "StatefulSet/stateful-busybox: FAILED", "app: Crashing repeatedly (exit 1). See logs for more information.", - "Events (common success events excluded):", - %r{\[Pod/stateful-busybox-\d\]\tBackOff: Back-off restarting failed container}, + *("Events (common success events excluded):" if ENV['CI'] == 'true'), # event + *(%r{\[Pod/stateful-busybox-\d\]\tBackOff: Back-off restarting failed container} if ENV['CI'] == 'true'), "Logs from container 'app':", "ls: /not-a-dir: No such file or directory", ], in_order: true) @@ -1182,7 +1187,7 @@ def test_resource_quotas_are_deployed_first "ResourceQuota/resource-quotas", %r{Deployment/web: TIMED OUT \(progress deadline: \d+s\)}, "Timeout reason: ProgressDeadlineExceeded", - "failed quota: resource-quotas", # from an event + *("failed quota: resource-quotas" if ENV['CI'] == 'true'), # from an event ], in_order: true) rqs = kubeclient.get_resource_quotas(namespace: @namespace) @@ -1330,7 +1335,7 @@ def test_jobs_can_fail "Result: FAILURE", "Job/hello-job: FAILED", "Final status: Failed", - %r{\[Job/hello-job\]\tDeadlineExceeded: Job was active longer than specified deadline \(\d+ events\)}, + *(%r{\[Job/hello-job\]\tDeadlineExceeded: Job was active longer than specified deadline \(\d+ events\)} if ENV['CI'] == 'true'), ]) end @@ -1343,19 +1348,19 @@ def test_resource_watcher_reports_failed_after_timeout bad_probe = f["bad_probe.yml"]["Deployment"].first bad_probe["spec"]["progressDeadlineSeconds"] = 5 f["missing_volumes.yml"]["Deployment"].first["spec"]["progressDeadlineSeconds"] = 30 - f["cannot_run.yml"]["Deployment"].first["spec"]["replicas"] = 1 + f["cannot_run.yml"]["Deployment"].first["spec"]["replicas"] = 1 #this results in pods in CrashLoopBackOff end - assert_deploy_failure(result) + assert_deploy_failure_or_timeout(result) bad_probe_timeout = "Deployment/bad-probe: TIMED OUT (progress deadline: 5s)" assert_logs_match_all([ - "Successfully deployed 1 resource, timed out waiting for 2 resources to deploy, and failed to deploy 1 resource", + /Successfully deployed 1 resource(,| and) timed out waiting for/, "Successful resources", "ConfigMap/test", - "Deployment/cannot-run: FAILED", bad_probe_timeout, - "Deployment/missing-volumes: GLOBAL WATCH TIMEOUT (20 seconds)", + /(Continuing to wait for:.*Deployment\/cannot-run.*)|(Deployment\/cannot-run: FAILED)/, + /(Continuing to wait for:.*Deployment\/missing-volumes.*)|(Deployment\/missing-volumes: GLOBAL WATCH TIMEOUT \(20 seconds\))/, ]) end diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index d13a78193..e76031acb 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -60,7 +60,7 @@ def test_restart_statefulset_on_delete_restarts_child_pods "Waiting for rollout", "Result: SUCCESS", "Successfully restarted 1 resource", - %r{StatefulSet/stateful-busybox.* 2 replicas}, + %r{StatefulSet/stateful-busybox.* (2 replicas|1 replica, 1 currentReplica)}, ], in_order: true) end @@ -291,7 +291,7 @@ def test_restart_failure "The following containers have not passed their readiness probes", "app must exit 0 from the following command", "Final status: 2 replicas, 1 updatedReplica, 1 availableReplica, 1 unavailableReplica", - "Unhealthy: Readiness probe failed", + *("Unhealthy: Readiness probe failed" if ENV['CI'] == 'true'), ], in_order: true) end diff --git a/test/test_helper.rb b/test/test_helper.rb index 370a9b79f..507461736 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -116,6 +116,14 @@ def assert_deploy_failure(result, cause = nil) alias_method :assert_restart_failure, :assert_deploy_failure alias_method :assert_task_run_failure, :assert_deploy_failure + def assert_deploy_failure_or_timeout(result) + assert_equal(false, result, "Deploy succeeded when it was expected to fail.#{logs_message_if_captured}") + logging_assertion do |logs| + assert(logs.include?("Result: FAILURE") || logs.include?("Result: TIMED OUT"), + "'Result: FAILURE' or 'Result: TIMED OUT' not found in the following logs:\n#{logs}") + end + end + def assert_deploy_success(result) assert_equal(true, result, "Deploy failed when it was expected to succeed.#{logs_message_if_captured}") logging_assertion do |logs|