From 728ea5e68e04a0f13f3074f84860493d223ba20e Mon Sep 17 00:00:00 2001 From: Konstantin Yarovoy Date: Mon, 7 Oct 2024 10:03:25 +0000 Subject: [PATCH] configmap: Remove installation configmap Installation configmap can significantly complicate new installation process and has multiple issues with its design (and related tests). Remove installation congigmap Redesign related tests or make them always skip until further redesign would be done in the scope of different change Refs: #2153 Signed-off-by: Konstantin Yarovoy --- spec/5g/ran_spec.cr | 26 +- spec/workload/installability_spec.cr | 2 +- spec/workload/observability_spec.cr | 29 +- src/tasks/constants.cr | 1 + src/tasks/utils/cnf_manager.cr | 290 +++++-------------- src/tasks/utils/jaeger.cr | 5 - src/tasks/workload/compatibility.cr | 10 +- src/tasks/workload/configuration.cr | 8 +- src/tasks/workload/microservice.cr | 71 ++++- src/tasks/workload/observability.cr | 14 +- src/tasks/workload/ran.cr | 13 +- src/templates/elapsed_time_configmap.yml.ecr | 12 - 12 files changed, 150 insertions(+), 331 deletions(-) delete mode 100644 src/templates/elapsed_time_configmap.yml.ecr diff --git a/spec/5g/ran_spec.cr b/spec/5g/ran_spec.cr index 67b495cfa..31655a4d1 100644 --- a/spec/5g/ran_spec.cr +++ b/spec/5g/ran_spec.cr @@ -35,33 +35,11 @@ describe "5g" do end it "'oran_e2_connection' should pass if the ORAN enabled RAN connects to the RIC using the e2 standard", tags: ["oran"] do - begin - setup_success = setup_5g_network - setup_success.should be_true - ShellCmd.cnf_setup("cnf-config=sample-cnfs/sample-oran-ric/cnf-testsuite.yml") - result = ShellCmd.run_testsuite("oran_e2_connection verbose") - (/(PASSED).*(RAN connects to a RIC using the e2 standard interface)/ =~ result[:output]).should_not be_nil - ensure - result = Helm.delete("open5gs -n oran") - result[:status].success?.should be_true - result = ShellCmd.run_testsuite("cnf_cleanup cnf-config=sample-cnfs/sample-oran-ric/cnf-testsuite.yml") - result[:status].success?.should be_true - end + # (kosstennbl) TODO: Test and specs for 'oran_e2_connection' should be redesigned. Check #2153 for more info. Spec was using sample_srsran_ueauth_open5gs and sample-oran-ric. end it "'oran_e2_connection' should fail if the ORAN enabled RAN does not connect to the RIC using the e2 standard", tags: ["oran"] do - begin - setup_success = setup_5g_network - setup_success.should be_true - ShellCmd.cnf_setup("cnf-config=sample-cnfs/sample-oran-noric/cnf-testsuite.yml") - result = ShellCmd.run_testsuite("oran_e2_connection verbose") - (/(FAILED).*(RAN does not connect to a RIC using the e2 standard interface)/ =~ result[:output]).should_not be_nil - ensure - result = Helm.delete("open5gs -n oran") - result[:status].success?.should be_true - result = ShellCmd.run_testsuite("cnf_cleanup cnf-config=sample-cnfs/sample-oran-noric/cnf-testsuite.yml") - result[:status].success?.should be_true - end + # (kosstennbl) TODO: Test and specs for 'oran_e2_connection' should be redesigned. Check #2153 for more info. Spec was using sample_srsran_ueauth_open5gs and sample-oran-noric. end end diff --git a/spec/workload/installability_spec.cr b/spec/workload/installability_spec.cr index daf15e524..48e18b340 100644 --- a/spec/workload/installability_spec.cr +++ b/spec/workload/installability_spec.cr @@ -12,7 +12,7 @@ describe CnfTestSuite do ShellCmd.cnf_setup("cnf-path=./sample-cnfs/k8s-non-helm") result = ShellCmd.run_testsuite("helm_deploy verbose") result[:status].success?.should be_true - (/(FAILED).*(Helm deploy failed)/ =~ result[:output]).should_not be_nil + (/(FAILED).*(CNF has deployments that are not installed with helm)/ =~ result[:output]).should_not be_nil ensure result = ShellCmd.run_testsuite("cnf_cleanup cnf-path=./sample-cnfs/k8s-non-helm verbose") end diff --git a/spec/workload/observability_spec.cr b/spec/workload/observability_spec.cr index ad7671f83..15dd34840 100644 --- a/spec/workload/observability_spec.cr +++ b/spec/workload/observability_spec.cr @@ -161,35 +161,10 @@ describe "Observability" do end it "'tracing' should fail if tracing is not used", tags: ["observability_jaeger_fail"] do - Log.info { "Installing Jaeger " } - JaegerManager.install - - ShellCmd.cnf_setup("cnf-config=sample-cnfs/sample-coredns-cnf/cnf-testsuite.yml") - result = ShellCmd.run_testsuite("tracing") - (/(FAILED).*(Tracing not used)/ =~ result[:output]).should_not be_nil - ensure - result = ShellCmd.run_testsuite("cnf_cleanup cnf-config=sample-cnfs/sample-coredns-cnf/cnf-testsuite.yml") - JaegerManager.uninstall - KubectlClient::Get.resource_wait_for_uninstall("Statefulset", "jaeger-cassandra") - KubectlClient::Get.resource_wait_for_uninstall("Deployment", "jaeger-collector") - KubectlClient::Get.resource_wait_for_uninstall("Deployment", "jaeger-query") - KubectlClient::Get.resource_wait_for_uninstall("Daemonset", "jaeger-agent") + # (kosstennbl) TODO: Test and specs for 'tracing' should be redesigned. Check #2153 for more info. Spec was using sample-coredns-cnf CNF. end it "'tracing' should pass if tracing is used", tags: ["observability_jaeger_pass"] do - Log.info { "Installing Jaeger " } - JaegerManager.install - - ShellCmd.cnf_setup("cnf-config=sample-cnfs/sample-tracing/cnf-testsuite.yml") - result = ShellCmd.run_testsuite("tracing") - (/(PASSED).*(Tracing used)/ =~ result[:output]).should_not be_nil - ensure - result = ShellCmd.run_testsuite("cnf_cleanup cnf-config=sample-cnfs/sample-tracing/cnf-testsuite.yml") - JaegerManager.uninstall - KubectlClient::Get.resource_wait_for_uninstall("Statefulset", "jaeger-cassandra") - KubectlClient::Get.resource_wait_for_uninstall("Deployment", "jaeger-collector") - KubectlClient::Get.resource_wait_for_uninstall("Deployment", "jaeger-query") - KubectlClient::Get.resource_wait_for_uninstall("Daemonset", "jaeger-agent") + # (kosstennbl) TODO: Test and specs for 'tracing' should be redesigned. Check #2153 for more info. Spec was using sample-tracing CNF. end - end diff --git a/src/tasks/constants.cr b/src/tasks/constants.cr index cb1676267..13ddea251 100644 --- a/src/tasks/constants.cr +++ b/src/tasks/constants.cr @@ -23,6 +23,7 @@ EMPTY_JSON = JSON.parse(%({})) EMPTY_JSON_ARRAY = JSON.parse(%([])) SPECIALIZED_INIT_SYSTEMS = ["tini", "dumb-init", "s6-svscan"] ROLLING_VERSION_CHANGE_TEST_NAMES = ["rolling_update", "rolling_downgrade", "rolling_version_change"] +WORKLOAD_RESOURCE_KIND_NAMES = ["replicaset", "deployment", "statefulset", "pod", "daemonset"] TESTSUITE_NAMESPACE = "cnf-testsuite" DEFAULT_CNF_NAMESPACE = "cnf-default" diff --git a/src/tasks/utils/cnf_manager.cr b/src/tasks/utils/cnf_manager.cr index 8f1b42b85..253b258b3 100644 --- a/src/tasks/utils/cnf_manager.cr +++ b/src/tasks/utils/cnf_manager.cr @@ -15,15 +15,6 @@ require "log" require "ecr" module CNFManager - class ElapsedTimeConfigMapTemplate - # elapsed_time should be Int32 but it is being passed as string - # So the old behaviour has been retained as is to prevent any breakages - def initialize(@release_name : String, @helm_used : Bool, @elapsed_time : String, @immutable : Bool, @tracing_used : Bool, @e2_found : Bool) - end - - ECR.def_to_s("src/templates/elapsed_time_configmap.yml.ecr") - end - def self.cnf_resource_ymls(args, config) Log.info { "cnf_resource_ymls" } manifest_ymls = CNFInstall::Manifest.manifest_path_to_ymls(COMMON_MANIFEST_FILE_PATH) @@ -419,229 +410,86 @@ module CNFManager # Set it to false by default to indicate a new release is being setup fresh_install = true - helm_install = {status: nil, output: "", error: ""} - - # todo determine what the ric is/if there is a ric installed (labeling) - #option 1 - # todo determine what pad/node the ric is in (pod/node by label) - # todo start tshark capture of the e2 traffic - # todo restart the ric pod when running the ric e2 test - # todo validate the e2 traffic - #option 2 - # todo start tshark capture of the e2 traffic (on all nodes?) - # todo install the ric/xapp (the xapp should be installed last?) - # todo note which pad/node the ric is in (what if cluster tools tshark was not executed on the node with the ric?) - # todo alternative (capture on gnodeb node) - # todo validate the e2 traffic - # todo save off the result to a config map - # todo check the config map in the e2 test - - match = JaegerManager.match() - if match[:found] - baselines = JaegerManager.unique_services_total - Log.info { "baselines: #{baselines}" } - end - - # todo start tshark monitoring the e2 traffic - capture = ORANMonitor.start_e2_capture?(config) - # todo separate out install methods into a module/function that accepts a block - liveness_time = 0 Log.for("sample_setup:install_method").info { "#{install_method[0]}" } Log.for("sample_setup:install_method").info { "#{install_method[1]}" } - elapsed_time = Time.measure do - case install_method[0] - when CNFInstall::InstallMethod::ManifestDirectory - Log.for("verbose").info { "deploying by manifest file" } if verbose - manifest_directory = config.deployments.get_deployment_param(:manifest_directory) - KubectlClient::Apply.file("#{destination_cnf_dir}/#{manifest_directory}") - when CNFInstall::InstallMethod::HelmChart - helm_chart = config.deployments.get_deployment_param(:helm_chart) - helm_repo_name = config.deployments.get_deployment_param(:helm_repo_name) - helm_repo_url = config.deployments.get_deployment_param(:helm_repo_url) - if !helm_repo_name.empty? || !helm_repo_url.empty? - Helm.helm_repo_add(helm_repo_name, helm_repo_url) - end - Log.for("verbose").info { "deploying with chart repository" } if verbose - begin - helm_install = Helm.install(release_name, helm_chart, helm_namespace_option, helm_values) - rescue e : Helm::InstallationFailed - stdout_failure "Helm installation failed" - stdout_failure "\t#{e.message}" - exit 1 - rescue e : Helm::CannotReuseReleaseNameError - stdout_warning "Release name #{release_name} has already been setup." - # Mark that install is not fresh - fresh_install = false - end - export_published_chart(config, cli_args) - when CNFInstall::InstallMethod::HelmDirectory - Log.for("verbose").info { "deploying with helm directory" } if verbose - #TODO Add helm options into cnf-testsuite yml - #e.g. helm install nsm --set insecure=true ./nsm/helm_chart - begin - helm_install = Helm.install(release_name, "#{install_method[1]}", helm_namespace_option, helm_values) - rescue e : Helm::InstallationFailed - stdout_failure "Helm installation failed" - stdout_failure "\t#{e.message}" - exit 1 - rescue e : Helm::CannotReuseReleaseNameError - stdout_warning "Release name #{release_name} has already been setup." - # Mark that install is not fresh - fresh_install = false - end - else - raise "Deployment method not found" + case install_method[0] + when CNFInstall::InstallMethod::ManifestDirectory + Log.for("verbose").info { "deploying by manifest file" } if verbose + manifest_directory = config.deployments.get_deployment_param(:manifest_directory) + KubectlClient::Apply.file("#{destination_cnf_dir}/#{manifest_directory}") + when CNFInstall::InstallMethod::HelmChart + helm_chart = config.deployments.get_deployment_param(:helm_chart) + helm_repo_name = config.deployments.get_deployment_param(:helm_repo_name) + helm_repo_url = config.deployments.get_deployment_param(:helm_repo_url) + if !helm_repo_name.empty? || !helm_repo_url.empty? + Helm.helm_repo_add(helm_repo_name, helm_repo_url) end - - #Generating manifest from installed CNF - #Returns true or false in case when manifest was generated successfully or not - manifest_generated_successfully = CNFInstall::Manifest.generate_common_manifest(config, release_name, deployment_namespace) - - if !manifest_generated_successfully - stdout_failure "Manifest generation failed. Check CNF definition (helm charts, values, manifests, etc.)" + Log.for("verbose").info { "deploying with chart repository" } if verbose + begin + Helm.install(release_name, helm_chart, helm_namespace_option, helm_values) + rescue e : Helm::InstallationFailed + stdout_failure "Helm installation failed" + stdout_failure "\t#{e.message}" exit 1 + rescue e : Helm::CannotReuseReleaseNameError + stdout_warning "Release name #{release_name} has already been setup." + fresh_install = false end - - resource_ymls = cnf_workload_resources(nil, config) do |resource| - resource - end - - resource_names = Helm.workload_resource_kind_names(resource_ymls, deployment_namespace) - #TODO move to kubectlclient and make resource_install_and_wait_for_all function - # get liveness probe initialDelaySeconds and FailureThreshold - # if ((periodSeconds * failureThreshhold) + initialDelaySeconds) / defaultFailureThreshold) > startuptimelimit then fail; else pass - # get largest startuptime of all resoures, then save into config map - resource_ymls.map do |resource| - kind = resource["kind"].as_s.downcase - case kind - when "pod" - Log.info { "resource: #{resource}" } - containers = resource.dig("spec", "containers") - when "deployment","statefulset","replicaset","daemonset" - Log.info { "resource: #{resource}" } - containers = resource.dig("spec", "template", "spec", "containers") - end - containers && containers.as_a.map do |container| - initialDelaySeconds = container.dig?("livenessProbe", "initialDelaySeconds") - failureThreshhold = container.dig?("livenessProbe", "failureThreshhold") - periodSeconds = container.dig?("livenessProbe", "periodSeconds") - total_period_failure = 0 - total_extended_period = 0 - adjusted_with_default = 0 - defaultFailureThreshold = 3 - defaultPeriodSeconds = 10 - - if !failureThreshhold.nil? && failureThreshhold.as_i? - ft = failureThreshhold.as_i - else - ft = defaultFailureThreshold - end - - if !periodSeconds.nil? && periodSeconds.as_i? - ps = periodSeconds.as_i - else - ps = defaultPeriodSeconds - end - - total_period_failure = ps * ft - - if !initialDelaySeconds.nil? && initialDelaySeconds.as_i? - total_extended_period = initialDelaySeconds.as_i + total_period_failure - else - total_extended_period = total_period_failure - end - - adjusted_with_default = (total_extended_period / defaultFailureThreshold).round.to_i - - Log.info { "total_period_failure: #{total_period_failure}" } - Log.info { "total_extended_period: #{total_extended_period}" } - Log.info { "liveness_time: #{liveness_time}" } - Log.info { "adjusted_with_default: #{adjusted_with_default}" } - if liveness_time < adjusted_with_default - liveness_time = adjusted_with_default - end - end - end - if !skip_wait_for_install - stdout_success "Waiting for resource availability, timeout for each resource is #{wait_count} seconds\n" - workload_resource_names = resource_names.select { |resource| - ["replicaset", "deployment", "statefulset", "pod", "daemonset"].includes?(resource[:kind].downcase) - } - total_resource_count = workload_resource_names.size() - current_resource_number = 1 - workload_resource_names.each do | resource | - stdout_success "Waiting for resource (#{current_resource_number}/#{total_resource_count}): [#{resource[:kind]}] #{resource[:name]}", same_line: true - ready = KubectlClient::Get.resource_wait_for_install(resource[:kind], resource[:name], wait_count: wait_count, namespace: resource[:namespace]) - if !ready - stdout_failure "CNF setup has timed-out, [#{resource[:kind]}] #{resource[:name]} is not ready after #{wait_count} seconds.", same_line: true - stdout_failure "Recommended course of actions would be to investigate the resource in cluster, then call cnf_cleanup and try to reinstall the CNF." - exit 1 - end - current_resource_number += 1 - end - stdout_success "All CNF resources are up!", same_line: true + export_published_chart(config, cli_args) + when CNFInstall::InstallMethod::HelmDirectory + Log.for("verbose").info { "deploying with helm directory" } if verbose + #TODO Add helm options into cnf-testsuite yml + #e.g. helm install nsm --set insecure=true ./nsm/helm_chart + begin + Helm.install(release_name, "#{install_method[1]}", helm_namespace_option, helm_values) + rescue e : Helm::InstallationFailed + stdout_failure "Helm installation failed" + stdout_failure "\t#{e.message}" + exit 1 + rescue e : Helm::CannotReuseReleaseNameError + stdout_warning "Release name #{release_name} has already been setup." + fresh_install = false end - end - - if match[:found] - sleep 120 - metrics_checkpoints = JaegerManager.unique_services_total - Log.info { "metrics_checkpoints: #{metrics_checkpoints}" } - tracing_used = JaegerManager.tracing_used?(baselines, metrics_checkpoints) - Log.info { "tracing_used: #{tracing_used}" } else - tracing_used = false + raise "Deployment method not found" end - - if ORANMonitor.isCNFaRIC?(config) - sleep 30 - e2_found = ORANMonitor.e2_session_established?(capture) - else - e2_found = false + + #Generating manifest from installed CNF + #Returns true or false in case when manifest was generated successfully or not + manifest_generated_successfully = CNFInstall::Manifest.generate_common_manifest(config, release_name, deployment_namespace) + + if !manifest_generated_successfully + stdout_failure "Manifest generation failed. Check CNF definition (helm charts, values, manifests, etc.)" + exit 1 + end + resource_ymls = cnf_workload_resources(nil, config) do |resource| + resource + end + resource_names = Helm.workload_resource_kind_names(resource_ymls, deployment_namespace) + if !skip_wait_for_install + stdout_success "Waiting for resource availability, timeout for each resource is #{wait_count} seconds\n" + workload_resource_names = resource_names.select { |resource| + WORKLOAD_RESOURCE_KIND_NAMES.includes?(resource[:kind].downcase) + } + total_resource_count = workload_resource_names.size() + current_resource_number = 1 + workload_resource_names.each do | resource | + stdout_success "Waiting for resource (#{current_resource_number}/#{total_resource_count}): [#{resource[:kind]}] #{resource[:name]}", same_line: true + ready = KubectlClient::Get.resource_wait_for_install(resource[:kind], resource[:name], wait_count: wait_count, namespace: resource[:namespace]) + if !ready + stdout_failure "CNF setup has timed-out, [#{resource[:kind]}] #{resource[:name]} is not ready after #{wait_count} seconds.", same_line: true + stdout_failure "Recommended course of actions would be to investigate the resource in cluster, then call cnf_cleanup and try to reinstall the CNF." + exit 1 + end + current_resource_number += 1 + end + stdout_success "All CNF resources are up!", same_line: true end - - Log.info { "final e2_found: #{e2_found}" } - Log.info { "final liveness_time: #{liveness_time}" } - Log.info { "elapsed_time.seconds: #{elapsed_time.seconds}" } - Log.info { "helm_install: #{helm_install}" } - Log.info { "helm_install[:error].to_s: #{helm_install[:error].to_s}" } - Log.info { "helm_install[:error].to_s.size: #{helm_install[:error].to_s.size}" } if fresh_install stdout_success "Successfully setup #{release_name}" end - - # Not required to write elapsed time configmap if the cnf already exists due to a previous Helm install - return true if !fresh_install - - # Immutable config maps are only supported in Kubernetes 1.19+ - immutable_configmap = true - if version_less_than(KubectlClient.server_version, "1.19.0") - immutable_configmap = false - end - - helm_used = !helm_install[:status].nil? - #TODO if helm_install then set helm_deploy = true in template - Log.info { "save config" } - elapsed_time_template = ElapsedTimeConfigMapTemplate.new( - "cnf-testsuite-#{release_name}-startup-information", - helm_used, - # "#{elapsed_time.seconds}", - "#{liveness_time}", - immutable_configmap, - tracing_used, - e2_found - ).to_s - #TODO find a way to kubectlapply directly without a map - Log.debug { "elapsed_time_template : #{elapsed_time_template}" } - configmap_path = "#{destination_cnf_dir}/config_maps/elapsed_time.yml" - File.write(configmap_path, "#{elapsed_time_template}") - # TODO if the config map exists on install, complain, delete then overwrite? - KubectlClient::Delete.file(configmap_path) - #TODO call kubectl apply on file - KubectlClient::Apply.file(configmap_path) - # TODO when uninstalling, remove config map ensure #todo uninstall/reinstall clustertools because of tshark bug end @@ -664,14 +512,14 @@ module CNFManager helm_repo_url = config.deployments.get_deployment_param(:helm_repo_url) helm_chart = config.deployments.get_deployment_param(:helm_chart) begin - helm_install = Helm.install("#{release_name} #{helm_chart} --kubeconfig #{kubeconfig} #{helm_namespace_option}") + Helm.install("#{release_name} #{helm_chart} --kubeconfig #{kubeconfig} #{helm_namespace_option}") rescue e : Helm::CannotReuseReleaseNameError stdout_warning "Release name #{release_name} has already been setup." end when CNFInstall::InstallMethod::HelmDirectory helm_directory = config.deployments.get_deployment_param(:helm_directory) begin - helm_install = Helm.install("#{release_name} #{destination_cnf_dir}/#{helm_directory} --kubeconfig #{kubeconfig} #{helm_namespace_option}") + Helm.install("#{release_name} #{destination_cnf_dir}/#{helm_directory} --kubeconfig #{kubeconfig} #{helm_namespace_option}") rescue e : Helm::CannotReuseReleaseNameError stdout_warning "Release name #{release_name} has already been setup." end @@ -687,7 +535,7 @@ module CNFManager wait_list = resource_names.map do | resource | case resource[:kind].downcase - when "replicaset", "deployment", "statefulset", "pod", "daemonset" + when .in?(WORKLOAD_RESOURCE_KIND_NAMES) Log.info { "waiting on resource of kind: #{resource[:kind].downcase}" } KubectlClient::Get.resource_wait_for_install(resource[:kind], resource[:name], 180, namespace: resource[:namespace], kubeconfig: kubeconfig) else diff --git a/src/tasks/utils/jaeger.cr b/src/tasks/utils/jaeger.cr index 4b8e57e83..87f992dd8 100644 --- a/src/tasks/utils/jaeger.cr +++ b/src/tasks/utils/jaeger.cr @@ -105,10 +105,5 @@ module JaegerManager Log.info { "total unique services for all pods: #{total_count}" } total_count end - - def self.tracing_used?(baseline, cnf_count) - cnf_count != baseline - end - end diff --git a/src/tasks/workload/compatibility.cr b/src/tasks/workload/compatibility.cr index 849c6492a..a123daf1c 100644 --- a/src/tasks/workload/compatibility.cr +++ b/src/tasks/workload/compatibility.cr @@ -390,15 +390,13 @@ task "helm_deploy" do |t, args| CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do |args, config| if check_cnf_config(args) || CNFManager.destination_cnfs_exist? - release_name = config.deployments.get_deployment_param(:name) - configmap = KubectlClient::Get.configmap("cnf-testsuite-#{release_name}-startup-information") - #TODO check if json is empty - helm_used = configmap["data"].as_h["helm_used"].as_s + install_method = config.deployments.get_install_method() + helm_used = install_method[0].is_a?(CNFInstall::InstallMethod::HelmDirectory) || install_method[0].is_a?(CNFInstall::InstallMethod::HelmChart) if helm_used == "true" - CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Helm deploy successful") + CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "CNF is installed via helm") else - CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Helm deploy failed") + CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "CNF has deployemnts, that are not installed with helm") end else CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "No cnf_testsuite.yml found! Did you run the setup task?") diff --git a/src/tasks/workload/configuration.cr b/src/tasks/workload/configuration.cr index 5d793181a..e068db109 100644 --- a/src/tasks/workload/configuration.cr +++ b/src/tasks/workload/configuration.cr @@ -153,7 +153,7 @@ task "versioned_tag", ["install_opa"] do |t, args| test_passed = true kind = resource["kind"].downcase case kind - when "deployment","statefulset","pod","replicaset", "daemonset" + when .in?(WORKLOAD_RESOURCE_KIND_NAMES) resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) pods = KubectlClient::Get.pods_by_resource(resource_yaml, namespace: resource[:namespace]) pods.map do |pod| @@ -268,9 +268,9 @@ task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |t, args| KubectlClient::Create.command("namespace hardcoded-ip-test") unless helm_chart.empty? - helm_install = Helm.install("--namespace hardcoded-ip-test hardcoded-ip-test #{helm_chart} --dry-run --debug > #{helm_chart_yml_path}") + Helm.install("--namespace hardcoded-ip-test hardcoded-ip-test #{helm_chart} --dry-run --debug > #{helm_chart_yml_path}") else - helm_install = Helm.install("--namespace hardcoded-ip-test hardcoded-ip-test #{destination_cnf_dir}/#{helm_directory} --dry-run --debug > #{helm_chart_yml_path}") + Helm.install("--namespace hardcoded-ip-test hardcoded-ip-test #{destination_cnf_dir}/#{helm_directory} --dry-run --debug > #{helm_chart_yml_path}") VERBOSE_LOGGING.info "helm_directory: #{helm_directory}" if check_verbose(args) end @@ -405,8 +405,6 @@ end # https://www.cloudytuts.com/tutorials/kubernetes/how-to-create-immutable-configmaps-and-secrets/ class ImmutableConfigMapTemplate - # elapsed_time should be Int32 but it is being passed as string - # So the old behaviour has been retained as is to prevent any breakages def initialize(@test_url : String) end diff --git a/src/tasks/workload/microservice.cr b/src/tasks/workload/microservice.cr index 5d7086b5f..f498c53ae 100644 --- a/src/tasks/workload/microservice.cr +++ b/src/tasks/workload/microservice.cr @@ -101,17 +101,70 @@ end desc "Does the CNF have a reasonable startup time (< 30 seconds)?" task "reasonable_startup_time" do |t, args| + # TODO (kosstennbl) Redesign this test, now it is based only on livness probes. CNFManager::Task.task_runner(args, task: t) do |args, config| - release_name = config.deployments.get_deployment_param(:name) - current_dir = FileUtils.pwd helm = Helm::BinarySingleton.helm Log.for("verbose").info {helm} if check_verbose(args) - configmap = KubectlClient::Get.configmap("cnf-testsuite-#{release_name}-startup-information") - #TODO check if json is empty - startup_time = configmap["data"].as_h["startup_time"].as_s + # (kosstennbl) That part was copied from cnf_manager.cr, but it wasn't given much attention as + # it would be probably redesigned in future. + startup_time = 0 + resource_ymls = CNFManager.cnf_workload_resources(args, config) { |resource| resource } + # get liveness probe initialDelaySeconds and FailureThreshold + # if ((periodSeconds * failureThreshhold) + initialDelaySeconds) / defaultFailureThreshold) > startuptimelimit then fail; else pass + # get largest startuptime of all resoures + resource_ymls.map do |resource| + kind = resource["kind"].as_s.downcase + case kind + when "pod" + Log.for(t.name).info { "resource: #{resource}" } + containers = resource.dig("spec", "containers") + when .in?(WORKLOAD_RESOURCE_KIND_NAMES) + Log.for(t.name).info { "resource: #{resource}" } + containers = resource.dig("spec", "template", "spec", "containers") + end + containers && containers.as_a.map do |container| + initialDelaySeconds = container.dig?("livenessProbe", "initialDelaySeconds") + failureThreshhold = container.dig?("livenessProbe", "failureThreshhold") + periodSeconds = container.dig?("livenessProbe", "periodSeconds") + total_period_failure = 0 + total_extended_period = 0 + adjusted_with_default = 0 + defaultFailureThreshold = 3 + defaultPeriodSeconds = 10 + + if !failureThreshhold.nil? && failureThreshhold.as_i? + ft = failureThreshhold.as_i + else + ft = defaultFailureThreshold + end + + if !periodSeconds.nil? && periodSeconds.as_i? + ps = periodSeconds.as_i + else + ps = defaultPeriodSeconds + end + + total_period_failure = ps * ft + + if !initialDelaySeconds.nil? && initialDelaySeconds.as_i? + total_extended_period = initialDelaySeconds.as_i + total_period_failure + else + total_extended_period = total_period_failure + end + + adjusted_with_default = (total_extended_period / defaultFailureThreshold).round.to_i + Log.info { "total_period_failure: #{total_period_failure}" } + Log.info { "total_extended_period: #{total_extended_period}" } + Log.info { "startup_time: #{startup_time}" } + Log.info { "adjusted_with_default: #{adjusted_with_default}" } + if startup_time < adjusted_with_default + startup_time = adjusted_with_default + end + end + end # Correlation for a slow box vs a fast box # sysbench base fast machine (disk), time in ms 0.16 # sysbench base slow machine (disk), time in ms 6.55 @@ -162,9 +215,9 @@ task "reasonable_startup_time" do |t, args| # LOGGING.info "startup_time_limit TEST mode: #{startup_time_limit}" # end Log.info { "startup_time_limit: #{startup_time_limit}" } - Log.info { "startup_time: #{startup_time.to_i}" } + Log.info { "startup_time: #{startup_time}" } - if startup_time.to_i <= startup_time_limit + if startup_time <= startup_time_limit CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "CNF had a reasonable startup time 🚀") else CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "CNF had a startup time of #{startup_time} seconds 🐢") @@ -429,7 +482,7 @@ task "sig_term_handled" do |t, args| # todo Clustertools.each_container_by_resource(resource, namespace) do | container_id, container_pid_on_node, node, container_proctree_statuses, container_status| kind = resource["kind"].downcase case kind - when "deployment","statefulset","pod","replicaset", "daemonset" + when .in?(WORKLOAD_RESOURCE_KIND_NAMES) resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) #todo needs namespace pods = KubectlClient::Get.pods_by_resource(resource_yaml, resource[:namespace]) @@ -683,7 +736,7 @@ task "specialized_init_system", ["install_cluster_tools"] do |t, args| CNFManager.workload_resource_test(args, config) do |resource, container, initialized| kind = resource["kind"].downcase case kind - when "deployment","statefulset","pod","replicaset", "daemonset" + when .in?(WORKLOAD_RESOURCE_KIND_NAMES) namespace = resource[:namespace] Log.for(t.name).info { "Checking resource #{resource[:kind]}/#{resource[:name]} in #{namespace}" } resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) diff --git a/src/tasks/workload/observability.cr b/src/tasks/workload/observability.cr index 3e53db9bb..0dedf1831 100644 --- a/src/tasks/workload/observability.cr +++ b/src/tasks/workload/observability.cr @@ -22,7 +22,7 @@ task "log_output" do |t, args| task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| test_passed = false case resource["kind"].downcase - when "replicaset", "deployment", "statefulset", "pod", "daemonset" + when .in?(WORKLOAD_RESOURCE_KIND_NAMES) result = KubectlClient.logs("#{resource["kind"]}/#{resource["name"]}", namespace: resource[:namespace], options: "--all-containers --tail=5 --prefix=true") Log.for("Log lines").info { result[:output] } if result[:output].size > 0 @@ -221,16 +221,8 @@ task "tracing" do |t, args| match = JaegerManager.match() Log.info { "jaeger match: #{match}" } if match[:found] - release_name = config.deployments.get_deployment_param(:name) - configmap = KubectlClient::Get.configmap("cnf-testsuite-#{release_name}-startup-information") - #TODO check if json is empty - tracing_used = configmap["data"].as_h["tracing_used"].as_s - - if tracing_used == "true" - CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Tracing used") - else - CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Tracing not used") - end + # (kosstennbl) TODO: Redesign tracing test, preferably without usage of installation configmaps. More info in issue #2153 + CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "tracing test is disabled, check #2153") else CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "Jaeger not configured") end diff --git a/src/tasks/workload/ran.cr b/src/tasks/workload/ran.cr index 1a4bc8d88..5b31aa579 100644 --- a/src/tasks/workload/ran.cr +++ b/src/tasks/workload/ran.cr @@ -17,16 +17,9 @@ desc "Test if RAN uses the ORAN e2 interface" task "oran_e2_connection" do |t, args| CNFManager::Task.task_runner(args, task: t) do |args, config| release_name = config.deployments.get_deployment_param(:name) - if ORANMonitor.isCNFaRIC?(config) - configmap = KubectlClient::Get.configmap("cnf-testsuite-#{release_name}-startup-information") - e2_found = configmap["data"].as_h["e2_found"].as_s - - - if e2_found == "true" - CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "RAN connects to a RIC using the e2 standard interface") - else - CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "RAN does not connect to a RIC using the e2 standard interface") - end + if ORANMonitor.isCNFaRIC?(config) + # (kosstennbl) TODO: Redesign oran_e2_connection test, preferably without usage of installation configmaps. More info in issue #2153 + CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "oran_e2_connection test is disabled, check #2153") else CNFManager::TestcaseResult.new(CNFManager::ResultStatus::NA, "[oran_e2_connection] No ric designated in cnf_testsuite.yml") end diff --git a/src/templates/elapsed_time_configmap.yml.ecr b/src/templates/elapsed_time_configmap.yml.ecr deleted file mode 100644 index 745510ed0..000000000 --- a/src/templates/elapsed_time_configmap.yml.ecr +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: '<%= @release_name %>' -<% if @immutable %> -immutable: true -<% end %> -data: - startup_time: '<%= @elapsed_time %>' - helm_used: '<%= @helm_used %>' - tracing_used: '<%= @tracing_used %>' - e2_found: '<%= @e2_found %>'