diff --git a/tests/reboot/internal/rebootconfig/config.go b/tests/reboot/internal/rebootconfig/config.go new file mode 100644 index 00000000..02cf859a --- /dev/null +++ b/tests/reboot/internal/rebootconfig/config.go @@ -0,0 +1,121 @@ +package rebootconfig + +import ( + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "runtime" + + "github.com/kelseyhightower/envconfig" + "github.com/openshift-kni/eco-gosystem/tests/internal/config" + "gopkg.in/yaml.v2" +) + +const ( + // PathToDefaultRebootParamsFile path to config file with default ran du parameters. + PathToDefaultRebootParamsFile = "./default.yaml" +) + +// RebootConfig type keeps ran du configuration. +type RebootConfig struct { + *config.GeneralConfig + ControlPlaneLabelStr string `yaml:"control_plane_nodes_label" envconfig:"ECO_REBOOT_CONTROL_PLANE_NODES_LABEL"` + MasterNodesLabelStr string `yaml:"master_nodes_label" envconfig:"ECO_REBOOT_MASTER_NODES_LABEL"` + WorkerNodesLabelStr string `yaml:"worker_nodes_label" envconfig:"ECO_REBOOT_WORKER_NODES_LABEL"` + NodesCredentialsMap NodesBMCMap `yaml:"nodes_bmc_map" envconfig:"ECO_SYSTEM_NODES_CREDENTIALS_MAP"` +} + +// BMCDetails structure to hold BMC details. +type BMCDetails struct { + Username string `json:"username"` + Password string `json:"password"` + BMCAddress string `json:"bmc"` +} + +// NodesBMCMap holds info about BMC connection for a specific node. +type NodesBMCMap map[string]BMCDetails + +// Decode - method for envconfig package to parse JSON encoded environment variables. +func (nad *NodesBMCMap) Decode(value string) error { + nodesAuthMap := new(map[string]BMCDetails) + + err := json.Unmarshal([]byte(value), nodesAuthMap) + + if err != nil { + log.Printf("Error to parse data %v", err) + + return fmt.Errorf("invalid map json: %w", err) + } + + *nad = *nodesAuthMap + + return nil +} + +// NewRebootConfig returns instance of RebootConfig config type. +func NewRebootConfig() *RebootConfig { + log.Print("Creating new RebootConfig struct") + + var rebootConf RebootConfig + rebootConf.GeneralConfig = config.NewConfig() + + var confFile string + + if fileFromEnv, exists := os.LookupEnv("ECO_SYSTEM_REBOOT_CONFIG_FILE_PATH"); !exists { + _, filename, _, _ := runtime.Caller(0) + baseDir := filepath.Dir(filename) + confFile = filepath.Join(baseDir, PathToDefaultRebootParamsFile) + } else { + confFile = fileFromEnv + } + + log.Printf("Open config file %s", confFile) + + err := readFile(&rebootConf, confFile) + if err != nil { + log.Printf("Error to read config file %s", confFile) + + return nil + } + + err = readEnv(&rebootConf) + + if err != nil { + log.Print("Error to read environment variables") + + return nil + } + + return &rebootConf +} + +func readFile(rebootConfig *RebootConfig, cfgFile string) error { + openedCfgFile, err := os.Open(cfgFile) + if err != nil { + return err + } + + defer func() { + _ = openedCfgFile.Close() + }() + + decoder := yaml.NewDecoder(openedCfgFile) + err = decoder.Decode(&rebootConfig) + + if err != nil { + return err + } + + return nil +} + +func readEnv(rebootConfig *RebootConfig) error { + err := envconfig.Process("", rebootConfig) + if err != nil { + return err + } + + return nil +} diff --git a/tests/reboot/internal/rebootconfig/default.yaml b/tests/reboot/internal/rebootconfig/default.yaml new file mode 100644 index 00000000..ab3d4bfa --- /dev/null +++ b/tests/reboot/internal/rebootconfig/default.yaml @@ -0,0 +1,10 @@ +--- +# default configurations. +verbose_level: 0 +dump_failed_tests: false +reports_dump_dir: "/tmp/reports" +polarion_report: true +dry_run: false +control_plane_nodes_label: "node-role.kubernetes.io/control-plane=''" +master_nodes_label: "node-role.kubernetes.io/master=''" +worker_nodes_label: "node-role.kubernetes.io/worker=''" diff --git a/tests/reboot/internal/rebootinittools/rebootinittools.go b/tests/reboot/internal/rebootinittools/rebootinittools.go new file mode 100644 index 00000000..72aac94a --- /dev/null +++ b/tests/reboot/internal/rebootinittools/rebootinittools.go @@ -0,0 +1,21 @@ +package rebootinittools + +import ( + "github.com/openshift-kni/eco-goinfra/pkg/clients" + "github.com/openshift-kni/eco-gosystem/tests/internal/inittools" + "github.com/openshift-kni/eco-gosystem/tests/reboot/internal/rebootconfig" +) + +var ( + // APIClient provides API access to cluster. + APIClient *clients.Settings + // RebootTestConfig provides access to tests configuration parameters. + RebootTestConfig *rebootconfig.RebootConfig +) + +// init loads all variables automatically when this package is imported. Once package is imported a user has full +// access to all vars within init function. It is recommended to import this package using dot import. +func init() { + RebootTestConfig = rebootconfig.NewRebootConfig() + APIClient = inittools.APIClient +} diff --git a/tests/reboot/internal/rebootparams/const.go b/tests/reboot/internal/rebootparams/const.go new file mode 100644 index 00000000..7f87c63a --- /dev/null +++ b/tests/reboot/internal/rebootparams/const.go @@ -0,0 +1,12 @@ +package rebootparams + +const ( + // Label is used for 'reboot' test cases selection. + Label = "reboot" + + // LabelValidateReboot is used to select tests that reboot cluster. + LabelValidateReboot = "validate_reboots" + + // RebootLogLevel configures logging level for reboot related tests. + RebootLogLevel = 90 +) diff --git a/tests/reboot/internal/rebootparams/rebootvars.go b/tests/reboot/internal/rebootparams/rebootvars.go new file mode 100644 index 00000000..75999866 --- /dev/null +++ b/tests/reboot/internal/rebootparams/rebootvars.go @@ -0,0 +1,22 @@ +package rebootparams + +import ( + systemtestsparams "github.com/openshift-kni/eco-gosystem/tests/internal/params" + "github.com/openshift-kni/k8sreporter" + v1 "k8s.io/api/core/v1" +) + +var ( + // Labels represents the range of labels that can be used for test cases selection. + Labels = []string{systemtestsparams.Label, Label} + + // ReporterNamespacesToDump tells to the reporter from where to collect logs. + ReporterNamespacesToDump = map[string]string{ + "openshift-machine-api": "openshift-machine-api", + } + + // ReporterCRDsToDump tells to the reporter what CRs to dump. + ReporterCRDsToDump = []k8sreporter.CRData{ + {Cr: &v1.PodList{}}, + } +) diff --git a/tests/reboot/reboots_suite_test.go b/tests/reboot/reboots_suite_test.go new file mode 100644 index 00000000..10af0e0d --- /dev/null +++ b/tests/reboot/reboots_suite_test.go @@ -0,0 +1,37 @@ +package spk_system_test + +import ( + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/openshift-kni/eco-goinfra/pkg/clients" + "github.com/openshift-kni/eco-goinfra/pkg/polarion" + "github.com/openshift-kni/eco-goinfra/pkg/reporter" + . "github.com/openshift-kni/eco-gosystem/tests/internal/inittools" + + "github.com/openshift-kni/eco-gosystem/tests/reboot/internal/rebootparams" + _ "github.com/openshift-kni/eco-gosystem/tests/reboot/tests" +) + +var _, currentFile, _, _ = runtime.Caller(0) + +func TestClusterReboot(t *testing.T) { + _, reporterConfig := GinkgoConfiguration() + reporterConfig.JUnitReport = GeneralConfig.GetJunitReportPath(currentFile) + + RegisterFailHandler(Fail) + RunSpecs(t, "SystemTests Reboot Suite", Label(rebootparams.Labels...), reporterConfig) +} + +var _ = JustAfterEach(func() { + reporter.ReportIfFailed( + CurrentSpecReport(), GeneralConfig.GetDumpFailedTestReportLocation(currentFile), GeneralConfig.ReportsDirAbsPath, + rebootparams.ReporterNamespacesToDump, rebootparams.ReporterCRDsToDump, clients.SetScheme) +}) + +var _ = ReportAfterSuite("", func(report Report) { + polarion.CreateReport( + report, GeneralConfig.GetPolarionReportPath(), GeneralConfig.PolarionTCPrefix) +}) diff --git a/tests/reboot/tests/ungraceful-reboot.go b/tests/reboot/tests/ungraceful-reboot.go new file mode 100644 index 00000000..ba64983d --- /dev/null +++ b/tests/reboot/tests/ungraceful-reboot.go @@ -0,0 +1,194 @@ +package ungraceful_system_test + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + bmclib "github.com/bmc-toolbox/bmclib/v2" + "github.com/golang/glog" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/openshift-kni/eco-goinfra/pkg/clusteroperator" + "github.com/openshift-kni/eco-goinfra/pkg/nodes" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + + . "github.com/openshift-kni/eco-gosystem/tests/reboot/internal/rebootinittools" + "github.com/openshift-kni/eco-gosystem/tests/reboot/internal/rebootparams" +) + +var _ = Describe( + "Cluster Reboots", + Ordered, + ContinueOnFailure, + Label(rebootparams.LabelValidateReboot), func() { + It("Verifies Hard Reboot", func(ctx SpecContext) { + if len(RebootTestConfig.NodesCredentialsMap) == 0 { + glog.V(rebootparams.RebootLogLevel).Infof("BMC Details not specified") + Skip("BMC Details not specified. Skipping...") + } + + clientOpts := []bmclib.Option{} + + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("BMC options %v", clientOpts)) + + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("NodesCredentialsMap:\n\t%#v", RebootTestConfig.NodesCredentialsMap)) + + var bmcMap = make(map[string]*bmclib.Client) + + for node, auth := range RebootTestConfig.NodesCredentialsMap { + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("Creating BMC client for node %s", node)) + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("BMC Auth %#v", auth)) + + bmcClient := bmclib.NewClient(auth.BMCAddress, auth.Username, auth.Password, clientOpts...) + bmcMap[node] = bmcClient + + } + + var waitGroup sync.WaitGroup + + for node, client := range bmcMap { + waitGroup.Add(1) + go func(wg *sync.WaitGroup, nodeName string, client *bmclib.Client) { + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("Starting go routine for %s", nodeName)) + + defer GinkgoRecover() + defer wg.Done() + + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("[%s] Setting timeout for context", nodeName)) + bmcCtx, cancel := context.WithTimeout(context.Background(), 6*time.Minute) + defer cancel() + + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("[%s] Starting BMC session", nodeName)) + err := client.Open(bmcCtx) + + Expect(err).ToNot(HaveOccurred(), + fmt.Sprintf("Failed to login to %s", nodeName)) + + defer client.Close(bmcCtx) + + By(fmt.Sprintf("Querying power state on %s", nodeName)) + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("Checking power state on %s", nodeName)) + + state, err := client.GetPowerState(bmcCtx) + msgRegex := `(?i)chassis power is on|(?i)^on$` + + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("Power state on %s -> %s", nodeName, state)) + + Expect(err).ToNot(HaveOccurred(), + fmt.Sprintf("Failed to login to %s", nodeName)) + Expect(strings.TrimSpace(state)).To(MatchRegexp(msgRegex), + fmt.Sprintf("Unexpected power state %s", state)) + + err = wait.PollUntilContextTimeout(ctx, 5*time.Second, 5*time.Minute, true, + func(ctx context.Context) (bool, error) { + if _, err := client.SetPowerState(bmcCtx, "cycle"); err != nil { + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("Failed to power cycle %s -> %v", nodeName, err)) + + return false, err + } + + glog.V(rebootparams.RebootLogLevel).Infof( + fmt.Sprintf("Successfully powered cycle %s", nodeName)) + + return true, nil + }) + + Expect(err).ToNot(HaveOccurred(), + fmt.Sprintf("Failed to reboot node %s", nodeName)) + + }(&waitGroup, node, client) + } + + By("Wait for all reboots to finish") + waitGroup.Wait() + glog.V(rebootparams.RebootLogLevel).Infof("Finished waiting for go routines to finish") + time.Sleep(1 * time.Minute) + + By("Checking all nodes are Ready") + Eventually(func(ctx SpecContext) bool { + allNodes, err := nodes.List(APIClient, metav1.ListOptions{}) + if err != nil { + glog.V(rebootparams.RebootLogLevel).Infof("Failed to list all nodes: %s", err) + + return false + } + + for _, _node := range allNodes { + glog.V(rebootparams.RebootLogLevel).Infof("Processing node %q", _node.Definition.Name) + + for _, condition := range _node.Object.Status.Conditions { + if condition.Type == "Ready" { + if condition.Status != "True" { + glog.V(rebootparams.RebootLogLevel).Infof("Node %q is notReady", _node.Definition.Name) + glog.V(rebootparams.RebootLogLevel).Infof(" Reason: %s", condition.Reason) + + return false + } + } + } + } + + return true + }).WithTimeout(25*time.Minute).WithPolling(15*time.Second).WithContext(ctx).Should(BeTrue(), + "Some nodes are notReady") + + // By("Checking all deployments") + // Eventually(func() bool { + // allDeployments, err := deployment.ListInAllNamespaces(APIClient, metav1.ListOptions{}) + + // if err != nil { + // glog.V(rebootparams.RebootLogLevel).Infof("Failed to list all deployments: %s", err) + + // return false + // } + + // glog.V(rebootparams.RebootLogLevel).Infof( + // fmt.Sprintf("Found %d deployments", len(allDeployments))) + + // var nonAvailableDeployments []*deployment.Builder + + // for _, deploy := range allDeployments { + // glog.V(rebootparams.RebootLogLevel).Infof( + // "Processing deployment %q in %q namespace", deploy.Definition.Name, deploy.Definition.Namespace) + + // for _, condition := range deploy.Object.Status.Conditions { + // if condition.Type == "Available" { + // if condition.Status != "True" { + // glog.V(rebootparams.RebootLogLevel).Infof( + // "Deployment %q in %q namespace is NotAvailable", deploy.Definition.Name, deploy.Definition.Namespace) + // glog.V(rebootparams.RebootLogLevel).Infof("\tReason: %s", condition.Reason) + // glog.V(rebootparams.RebootLogLevel).Infof("\tMessage: %s", condition.Message) + // nonAvailableDeployments = append(nonAvailableDeployments, deploy) + // } + // } + // } + // } + + // return len(nonAvailableDeployments) == 0 + // }).WithTimeout(25*time.Minute).WithPolling(15*time.Second).WithContext(ctx).Should(BeTrue(), + // "There are non-available deployments") // end Eventually + + By("Checking all cluster operators") + ok, err := clusteroperator.WaitForAllClusteroperatorsAvailable( + APIClient, 15*time.Minute, metav1.ListOptions{}) + + Expect(err).ToNot(HaveOccurred(), "Failed to get cluster operator status") + Expect(ok).To(BeTrue(), "Some cluster operators not Available") + + }) + + }) //