From ebd27f6feea32e585d8d2b198d1bf59b408421ca Mon Sep 17 00:00:00 2001 From: Stefan Majer Date: Fri, 4 Oct 2024 08:20:19 +0200 Subject: [PATCH] Add powersupply failures to machine issues --- cmd/metal-api/internal/issues/issues_test.go | 33 +++++++++++++++ .../internal/issues/powersupply-failure.go | 40 +++++++++++++++++++ cmd/metal-api/internal/issues/types.go | 3 ++ 3 files changed, 76 insertions(+) create mode 100644 cmd/metal-api/internal/issues/powersupply-failure.go diff --git a/cmd/metal-api/internal/issues/issues_test.go b/cmd/metal-api/internal/issues/issues_test.go index 934f26add..c47786f36 100644 --- a/cmd/metal-api/internal/issues/issues_test.go +++ b/cmd/metal-api/internal/issues/issues_test.go @@ -85,6 +85,39 @@ func TestFindIssues(t *testing.T) { } }, }, + { + name: "powersupply failure", + only: []Type{TypePowerSupplyFailure}, + machines: func() metal.Machines { + noPartitionMachine := machineTemplate("power-supply-failure") + noPartitionMachine.IPMI = metal.IPMI{ + PowerSupplies: metal.PowerSupplies{ + {Status: metal.PowerSupplyStatus{Health: "NO-OK", State: "Absent"}}, + }, + } + + return metal.Machines{ + noPartitionMachine, + machineTemplate("good"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + return metal.ProvisioningEventContainers{ + eventContainerTemplate("power-supply-failure"), + eventContainerTemplate("good"), + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[0], + Issues: Issues{ + toIssue(&issuePowerSupplyFailure{details: "Health:NO-OK State:Absent"}), + }, + }, + } + }, + }, { name: "liveliness dead", only: []Type{TypeLivelinessDead}, diff --git a/cmd/metal-api/internal/issues/powersupply-failure.go b/cmd/metal-api/internal/issues/powersupply-failure.go new file mode 100644 index 000000000..1b002f14c --- /dev/null +++ b/cmd/metal-api/internal/issues/powersupply-failure.go @@ -0,0 +1,40 @@ +package issues + +import ( + "fmt" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +const ( + TypePowerSupplyFailure Type = "powersupply-failure" +) + +type ( + issuePowerSupplyFailure struct { + details string + } +) + +func (i *issuePowerSupplyFailure) Spec() *spec { + return &spec{ + Type: TypePowerSupplyFailure, + Severity: SeverityMajor, + Description: "machine has power supply failures", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#power-supply-failure", + } +} + +func (i *issuePowerSupplyFailure) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *Config) bool { + for _, ps := range m.IPMI.PowerSupplies { + if ps.Status.Health != "OK" || ps.Status.State != "Enabled" { + i.details = fmt.Sprintf("Health:%s State:%s", ps.Status.Health, ps.Status.State) + return true + } + } + return false +} + +func (i *issuePowerSupplyFailure) Details() string { + return i.details +} diff --git a/cmd/metal-api/internal/issues/types.go b/cmd/metal-api/internal/issues/types.go index 2ff13ad0e..c38536a08 100644 --- a/cmd/metal-api/internal/issues/types.go +++ b/cmd/metal-api/internal/issues/types.go @@ -21,6 +21,7 @@ func AllIssueTypes() []Type { TypeASNUniqueness, TypeNonDistinctBMCIP, TypeNoEventContainer, + TypePowerSupplyFailure, } } @@ -52,6 +53,8 @@ func NewIssueFromType(t Type) (issue, error) { return &issueNonDistinctBMCIP{}, nil case TypeNoEventContainer: return &issueNoEventContainer{}, nil + case TypePowerSupplyFailure: + return &issuePowerSupplyFailure{}, nil default: return nil, fmt.Errorf("unknown issue type: %s", t) }