diff --git a/cmd/metal-api/internal/issues/asn-uniqueness.go b/cmd/metal-api/internal/issues/asn-uniqueness.go new file mode 100644 index 000000000..e01a07756 --- /dev/null +++ b/cmd/metal-api/internal/issues/asn-uniqueness.go @@ -0,0 +1,122 @@ +package issues + +import ( + "fmt" + "sort" + "strings" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +const ( + IssueTypeASNUniqueness IssueType = "asn-not-unique" +) + +type ( + IssueASNUniqueness struct { + details string + } +) + +func (i *IssueASNUniqueness) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeASNUniqueness, + Severity: IssueSeverityMinor, + Description: "The ASN is not unique (only impact on firewalls)", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#asn-not-unique", + } +} + +func (i *IssueASNUniqueness) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + var ( + machineASNs = map[uint32]metal.Machines{} + overlaps []string + isNoFirewall = func(m metal.Machine) bool { + return m.Allocation == nil || m.Allocation.Role != metal.RoleFirewall + } + ) + + if isNoFirewall(m) { + return false + } + + for _, n := range m.Allocation.MachineNetworks { + n := n + + if n.ASN == 0 { + continue + } + + machineASNs[n.ASN] = nil + } + + for _, machineFromAll := range c.Machines { + machineFromAll := machineFromAll + + if machineFromAll.ID == m.ID { + continue + } + otherMachine := machineFromAll + + if isNoFirewall(otherMachine) { + continue + } + + for _, n := range otherMachine.Allocation.MachineNetworks { + n := n + + if n.ASN == 0 { + continue + } + + _, ok := machineASNs[n.ASN] + if !ok { + continue + } + + machineASNs[n.ASN] = append(machineASNs[n.ASN], otherMachine) + } + } + + var asnList []uint32 + for asn := range machineASNs { + asn := asn + asnList = append(asnList, asn) + } + sort.Slice(asnList, func(i, j int) bool { + return asnList[i] < asnList[j] + }) + + for _, asn := range asnList { + asn := asn + + overlappingMachines, ok := machineASNs[asn] + if !ok || len(overlappingMachines) == 0 { + continue + } + + var sharedIDs []string + for _, m := range overlappingMachines { + m := m + sharedIDs = append(sharedIDs, m.ID) + } + + overlaps = append(overlaps, fmt.Sprintf("- ASN (%d) not unique, shared with %s", asn, sharedIDs)) + } + + if len(overlaps) == 0 { + return false + } + + sort.Slice(overlaps, func(i, j int) bool { + return overlaps[i] < overlaps[j] + }) + + i.details = strings.Join(overlaps, "\n") + + return true +} + +func (i *IssueASNUniqueness) Details() string { + return i.details +} diff --git a/cmd/metal-api/internal/issues/bmc-info-outdated.go b/cmd/metal-api/internal/issues/bmc-info-outdated.go new file mode 100644 index 000000000..d7766d875 --- /dev/null +++ b/cmd/metal-api/internal/issues/bmc-info-outdated.go @@ -0,0 +1,47 @@ +package issues + +import ( + "fmt" + "time" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +const ( + IssueTypeBMCInfoOutdated IssueType = "bmc-info-outdated" +) + +type ( + IssueBMCInfoOutdated struct { + details string + } +) + +func (i *IssueBMCInfoOutdated) Details() string { + return i.details +} + +func (i *IssueBMCInfoOutdated) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + if m.IPMI.LastUpdated.IsZero() { + i.details = "machine ipmi has never been set" + return true + } + + lastUpdated := time.Since(m.IPMI.LastUpdated) + + if lastUpdated > 20*time.Minute { + i.details = fmt.Sprintf("last updated %s ago", lastUpdated.String()) + return true + } + + return false +} + +func (*IssueBMCInfoOutdated) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeBMCInfoOutdated, + Severity: IssueSeverityMajor, + Description: "BMC has not been updated from either metal-hammer or metal-bmc", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#bmc-info-outdated", + } +} diff --git a/cmd/metal-api/internal/issues/bmc-without-ip.go b/cmd/metal-api/internal/issues/bmc-without-ip.go new file mode 100644 index 000000000..3552b440e --- /dev/null +++ b/cmd/metal-api/internal/issues/bmc-without-ip.go @@ -0,0 +1,28 @@ +package issues + +import "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + +const ( + IssueTypeBMCWithoutIP IssueType = "bmc-without-ip" +) + +type ( + IssueBMCWithoutIP struct{} +) + +func (i *IssueBMCWithoutIP) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeBMCWithoutIP, + Severity: IssueSeverityMajor, + Description: "BMC has no ip address", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#bmc-without-ip", + } +} + +func (i *IssueBMCWithoutIP) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + return m.IPMI.Address == "" +} + +func (i *IssueBMCWithoutIP) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/bmc-without-mac.go b/cmd/metal-api/internal/issues/bmc-without-mac.go new file mode 100644 index 000000000..bb220b9da --- /dev/null +++ b/cmd/metal-api/internal/issues/bmc-without-mac.go @@ -0,0 +1,28 @@ +package issues + +import "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + +const ( + IssueTypeBMCWithoutMAC IssueType = "bmc-without-mac" +) + +type ( + IssueBMCWithoutMAC struct{} +) + +func (i *IssueBMCWithoutMAC) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeBMCWithoutMAC, + Severity: IssueSeverityMajor, + Description: "BMC has no mac address", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#bmc-without-mac", + } +} + +func (i *IssueBMCWithoutMAC) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + return m.IPMI.MacAddress == "" +} + +func (i *IssueBMCWithoutMAC) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/crash-loop.go b/cmd/metal-api/internal/issues/crash-loop.go new file mode 100644 index 000000000..668d69e03 --- /dev/null +++ b/cmd/metal-api/internal/issues/crash-loop.go @@ -0,0 +1,38 @@ +package issues + +import ( + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "github.com/metal-stack/metal-lib/pkg/pointer" +) + +const ( + IssueTypeCrashLoop IssueType = "crashloop" +) + +type ( + IssueCrashLoop struct{} +) + +func (i *IssueCrashLoop) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeCrashLoop, + Severity: IssueSeverityMajor, + Description: "machine is in a provisioning crash loop (⭕)", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#crashloop", + } +} + +func (i *IssueCrashLoop) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + if ec.CrashLoop { + if pointer.FirstOrZero(ec.Events).Event == metal.ProvisioningEventWaiting { + // Machine which are waiting are not considered to have issues + } else { + return true + } + } + return false +} + +func (i *IssueCrashLoop) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/failed-machine-reclaim.go b/cmd/metal-api/internal/issues/failed-machine-reclaim.go new file mode 100644 index 000000000..02b62ebfa --- /dev/null +++ b/cmd/metal-api/internal/issues/failed-machine-reclaim.go @@ -0,0 +1,41 @@ +package issues + +import ( + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "github.com/metal-stack/metal-lib/pkg/pointer" +) + +const ( + IssueTypeFailedMachineReclaim IssueType = "failed-machine-reclaim" +) + +type ( + IssueFailedMachineReclaim struct{} +) + +func (i *IssueFailedMachineReclaim) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeFailedMachineReclaim, + Severity: IssueSeverityCritical, + Description: "machine phones home but not allocated", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#failed-machine-reclaim", + } +} + +func (i *IssueFailedMachineReclaim) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + if ec.FailedMachineReclaim { + return true + } + + // compatibility: before the provisioning FSM was renewed, this state could be detected the following way + // we should keep this condition + if m.Allocation == nil && pointer.FirstOrZero(ec.Events).Event == metal.ProvisioningEventPhonedHome { + return true + } + + return false +} + +func (i *IssueFailedMachineReclaim) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/issues.go b/cmd/metal-api/internal/issues/issues.go new file mode 100644 index 000000000..1181f8588 --- /dev/null +++ b/cmd/metal-api/internal/issues/issues.go @@ -0,0 +1,188 @@ +package issues + +import ( + "sort" + "time" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +type ( + // IssueConfig contains configuration parameters for finding machine issues + IssueConfig struct { + Machines metal.Machines + EventContainers metal.ProvisioningEventContainers + Severity IssueSeverity + Only []IssueType + Omit []IssueType + LastErrorThreshold time.Duration + } + + // Issue formulates an issue of a machine + Issue struct { + Type IssueType + Severity IssueSeverity + Description string + RefURL string + Details string + } + + // Issues is a list of issues + Issues []Issue + + // MachineWithIssues summarizes a machine with issues + MachineWithIssues struct { + Machine *metal.Machine + Issues Issues + } + // MachineIssues is map of a machine response to a list of machine issues + MachineIssues []*MachineWithIssues + + machineIssueMap map[*metal.Machine]Issues + + issueImpl interface { + // Evaluate decides whether a given machine has the machine issue. + // the third argument contains additional information that may be required for the issue evaluation + Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool + // Spec returns the issue spec of this issue. + Spec() *issueSpec + // Details returns additional information on the issue after the evaluation. + Details() string + } + + // issueSpec defines the specification of an issue. + issueSpec struct { + Type IssueType + Severity IssueSeverity + Description string + RefURL string + } +) + +func AllIssues() Issues { + var res Issues + + for _, t := range AllIssueTypes() { + i, err := newIssueFromType(t) + if err != nil { + continue + } + + res = append(res, toIssue(i)) + } + + return res +} + +func toIssue(i issueImpl) Issue { + return Issue{ + Type: i.Spec().Type, + Severity: i.Spec().Severity, + Description: i.Spec().Description, + RefURL: i.Spec().RefURL, + Details: i.Details(), + } +} + +func FindIssues(c *IssueConfig) (MachineIssues, error) { + res := machineIssueMap{} + + ecs := c.EventContainers.ByID() + + for _, t := range AllIssueTypes() { + if !c.includeIssue(t) { + continue + } + + for _, m := range c.Machines { + m := m + + i, err := newIssueFromType(t) + if err != nil { + return nil, err + } + + ec, ok := ecs[m.ID] + if !ok { + res.add(m, toIssue(&IssueNoEventContainer{})) + continue + } + + if i.Evaluate(m, ec, c) { + res.add(m, toIssue(i)) + } + } + } + + return res.toList(), nil +} + +func (mis MachineIssues) Get(id string) *MachineWithIssues { + for _, m := range mis { + m := m + + if m.Machine == nil { + continue + } + + if m.Machine.ID == id { + return m + } + } + + return nil +} + +func (c *IssueConfig) includeIssue(t IssueType) bool { + issue, err := newIssueFromType(t) + if err != nil { + return false + } + + if issue.Spec().Severity.LowerThan(c.Severity) { + return false + } + + for _, o := range c.Omit { + if t == o { + return false + } + } + + if len(c.Only) > 0 { + for _, o := range c.Only { + if t == o { + return true + } + } + return false + } + + return true +} + +func (mim machineIssueMap) add(m metal.Machine, issue Issue) { + issues, ok := mim[&m] + if !ok { + issues = Issues{} + } + issues = append(issues, issue) + mim[&m] = issues +} + +func (mim machineIssueMap) toList() MachineIssues { + var res MachineIssues + + for m, issues := range mim { + res = append(res, &MachineWithIssues{ + Machine: m, + Issues: issues, + }) + } + + sort.Slice(res, func(i, j int) bool { + return res[i].Machine.ID < res[j].Machine.ID + }) + + return res +} diff --git a/cmd/metal-api/internal/issues/issues_test.go b/cmd/metal-api/internal/issues/issues_test.go new file mode 100644 index 000000000..31b359ca7 --- /dev/null +++ b/cmd/metal-api/internal/issues/issues_test.go @@ -0,0 +1,484 @@ +package issues + +import ( + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "github.com/stretchr/testify/require" +) + +func TestFindIssues(t *testing.T) { + machineTemplate := func(id string) metal.Machine { + return metal.Machine{ + Base: metal.Base{ + ID: id, + }, + PartitionID: "a", + IPMI: metal.IPMI{ + Address: "1.2.3.4", + MacAddress: "aa:bb:00", + LastUpdated: time.Now().Add(-1 * time.Minute), + }, + } + } + eventContainerTemplate := func(id string) metal.ProvisioningEventContainer { + return metal.ProvisioningEventContainer{ + Base: metal.Base{ + ID: id, + }, + Liveliness: metal.MachineLivelinessAlive, + } + } + + tests := []struct { + name string + only []IssueType + + machines func() metal.Machines + eventContainers func() metal.ProvisioningEventContainers + + want func(machines metal.Machines) MachineIssues + }{ + { + name: "good machine has no issues", + machines: func() metal.Machines { + return metal.Machines{ + machineTemplate("good"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + return metal.ProvisioningEventContainers{ + eventContainerTemplate("good"), + } + }, + want: nil, + }, + { + name: "no partition", + only: []IssueType{IssueTypeNoPartition}, + machines: func() metal.Machines { + noPartitionMachine := machineTemplate("no-partition") + noPartitionMachine.PartitionID = "" + + return metal.Machines{ + noPartitionMachine, + machineTemplate("good"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + return metal.ProvisioningEventContainers{ + eventContainerTemplate("no-partition"), + eventContainerTemplate("good"), + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[0], + Issues: Issues{ + toIssue(&IssueNoPartition{}), + }, + }, + } + }, + }, + { + name: "liveliness dead", + only: []IssueType{IssueTypeLivelinessDead}, + machines: func() metal.Machines { + return metal.Machines{ + machineTemplate("dead"), + machineTemplate("good"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + dead := eventContainerTemplate("dead") + dead.Liveliness = metal.MachineLivelinessDead + + return metal.ProvisioningEventContainers{ + dead, + eventContainerTemplate("good"), + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[0], + Issues: Issues{ + toIssue(&IssueLivelinessDead{}), + }, + }, + } + }, + }, + { + name: "liveliness unknown", + only: []IssueType{IssueTypeLivelinessUnknown}, + machines: func() metal.Machines { + return metal.Machines{ + machineTemplate("unknown"), + machineTemplate("good"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + unknown := eventContainerTemplate("unknown") + unknown.Liveliness = metal.MachineLivelinessUnknown + + return metal.ProvisioningEventContainers{ + unknown, + eventContainerTemplate("good"), + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[0], + Issues: Issues{ + toIssue(&IssueLivelinessUnknown{}), + }, + }, + } + }, + }, + { + name: "liveliness not available", + only: []IssueType{IssueTypeLivelinessNotAvailable}, + machines: func() metal.Machines { + return metal.Machines{ + machineTemplate("n/a"), + machineTemplate("good"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + na := eventContainerTemplate("n/a") + na.Liveliness = metal.MachineLiveliness("") + + return metal.ProvisioningEventContainers{ + na, + eventContainerTemplate("good"), + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[0], + Issues: Issues{ + toIssue(&IssueLivelinessNotAvailable{}), + }, + }, + } + }, + }, + { + name: "failed machine reclaim", + only: []IssueType{IssueTypeFailedMachineReclaim}, + machines: func() metal.Machines { + failedOld := machineTemplate("failed-old") + + return metal.Machines{ + machineTemplate("good"), + machineTemplate("failed"), + failedOld, + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + failed := eventContainerTemplate("failed") + failed.FailedMachineReclaim = true + + failedOld := eventContainerTemplate("failed-old") + failedOld.Events = metal.ProvisioningEvents{ + { + Event: metal.ProvisioningEventPhonedHome, + }, + } + + return metal.ProvisioningEventContainers{ + failed, + eventContainerTemplate("good"), + failedOld, + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[1], + Issues: Issues{ + toIssue(&IssueFailedMachineReclaim{}), + }, + }, + { + Machine: &machines[2], + Issues: Issues{ + toIssue(&IssueFailedMachineReclaim{}), + }, + }, + } + }, + }, + { + name: "crashloop", + only: []IssueType{IssueTypeCrashLoop}, + machines: func() metal.Machines { + return metal.Machines{ + machineTemplate("good"), + machineTemplate("crash"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + crash := eventContainerTemplate("crash") + crash.CrashLoop = true + + return metal.ProvisioningEventContainers{ + crash, + eventContainerTemplate("good"), + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[1], + Issues: Issues{ + toIssue(&IssueCrashLoop{}), + }, + }, + } + }, + }, + // { + // name: "last event error", + // only: []IssueType{IssueTypeLastEventError}, + // machines: func() metal.Machines { + // lastEventErrorMachine := machineTemplate("last") + // lastEventErrorMachine.Events = &models.V1MachineRecentProvisioningEvents{ + // LastErrorEvent: &models.V1MachineProvisioningEvent{ + // Time: strfmt.DateTime(testTime.Add(-5 * time.Minute)), + // }, + // } + + // return metal.Machines{ + // machineTemplate("0"), + // lastEventErrorMachine, + // } + // }, + // want: func(machines metal.Machines) MachineIssues { + // return MachineIssues{ + // { + // Machine: machines[1], + // Issues: Issues{ + // toIssue(&IssueLastEventError{details: "occurred 5m0s ago"}), + // }, + // }, + // } + // }, + // }, + // { + // name: "bmc without mac", + // only: []IssueType{IssueTypeBMCWithoutMAC}, + // machines: func() metal.Machines { + // bmcWithoutMacMachine := machineTemplate("no-mac") + // bmcWithoutMacMachine.Ipmi.Mac = nil + + // return metal.Machines{ + // machineTemplate("0"), + // bmcWithoutMacMachine, + // } + // }, + // want: func(machines metal.Machines) MachineIssues { + // return MachineIssues{ + // { + // Machine: machines[1], + // Issues: Issues{ + // toIssue(&IssueBMCWithoutMAC{}), + // }, + // }, + // } + // }, + // }, + // { + // name: "bmc without ip", + // only: []IssueType{IssueTypeBMCWithoutIP}, + // machines: func() metal.Machines { + // bmcWithoutMacMachine := machineTemplate("no-ip") + // bmcWithoutMacMachine.Ipmi.Address = nil + + // return metal.Machines{ + // machineTemplate("0"), + // bmcWithoutMacMachine, + // } + // }, + // want: func(machines metal.Machines) MachineIssues { + // return MachineIssues{ + // { + // Machine: machines[1], + // Issues: Issues{ + // toIssue(&IssueBMCWithoutIP{}), + // }, + // }, + // } + // }, + // }, + // { + // name: "bmc info outdated", + // only: []IssueType{IssueTypeBMCInfoOutdated}, + // machines: func() metal.Machines { + // bmcOutdatedMachine := machineTemplate("outdated") + // bmcOutdatedMachine.Ipmi.LastUpdated = pointer.Pointer(strfmt.DateTime(testTime.Add(-3 * 60 * time.Minute))) + + // return metal.Machines{ + // machineTemplate("0"), + // bmcOutdatedMachine, + // } + // }, + // want: func(machines metal.Machines) MachineIssues { + // return MachineIssues{ + // { + // Machine: machines[1], + // Issues: Issues{ + // toIssue(&IssueBMCInfoOutdated{ + // details: "last updated 3h0m0s ago", + // }), + // }, + // }, + // } + // }, + // }, + // { + // name: "asn shared", + // only: []IssueType{IssueTypeASNUniqueness}, + // machines: func() metal.Machines { + // asnSharedMachine1 := machineTemplate("shared1") + // asnSharedMachine1.Allocation = &models.V1MachineAllocation{ + // Role: pointer.Pointer(models.V1MachineAllocationRoleFirewall), + // Networks: []*models.V1MachineNetwork{ + // { + // Asn: pointer.Pointer(int64(0)), + // }, + // { + // Asn: pointer.Pointer(int64(100)), + // }, + // { + // Asn: pointer.Pointer(int64(200)), + // }, + // }, + // } + + // asnSharedMachine2 := machineTemplate("shared2") + // asnSharedMachine2.Allocation = &models.V1MachineAllocation{ + // Role: pointer.Pointer(models.V1MachineAllocationRoleFirewall), + // Networks: []*models.V1MachineNetwork{ + // { + // Asn: pointer.Pointer(int64(1)), + // }, + // { + // Asn: pointer.Pointer(int64(100)), + // }, + // { + // Asn: pointer.Pointer(int64(200)), + // }, + // }, + // } + + // return metal.Machines{ + // asnSharedMachine1, + // asnSharedMachine2, + // machineTemplate("0"), + // } + // }, + // want: func(machines metal.Machines) MachineIssues { + // return MachineIssues{ + // { + // Machine: machines[0], + // Issues: Issues{ + // toIssue(&IssueASNUniqueness{ + // details: fmt.Sprintf("- ASN (100) not unique, shared with [%[1]s]\n- ASN (200) not unique, shared with [%[1]s]", *machines[1].ID), + // }), + // }, + // }, + // { + // Machine: machines[1], + // Issues: Issues{ + // toIssue(&IssueASNUniqueness{ + // details: fmt.Sprintf("- ASN (100) not unique, shared with [%[1]s]\n- ASN (200) not unique, shared with [%[1]s]", *machines[0].ID), + // }), + // }, + // }, + // } + // }, + // }, + // { + // name: "non distinct bmc ip", + // only: []IssueType{IssueTypeNonDistinctBMCIP}, + // machines: func() metal.Machines { + // nonDistinctBMCMachine1 := machineTemplate("bmc1") + // nonDistinctBMCMachine1.Ipmi.Address = pointer.Pointer("127.0.0.1") + + // nonDistinctBMCMachine2 := machineTemplate("bmc2") + // nonDistinctBMCMachine2.Ipmi.Address = pointer.Pointer("127.0.0.1") + + // return metal.Machines{ + // nonDistinctBMCMachine1, + // nonDistinctBMCMachine2, + // machineTemplate("0"), + // } + // }, + // want: func(machines metal.Machines) MachineIssues { + // return MachineIssues{ + // { + // Machine: machines[0], + // Issues: Issues{ + // toIssue(&IssueNonDistinctBMCIP{ + // details: fmt.Sprintf("BMC IP (127.0.0.1) not unique, shared with [%[1]s]", *machines[1].ID), + // }), + // }, + // }, + // { + // Machine: machines[1], + // Issues: Issues{ + // toIssue(&IssueNonDistinctBMCIP{ + // details: fmt.Sprintf("BMC IP (127.0.0.1) not unique, shared with [%[1]s]", *machines[0].ID), + // }), + // }, + // }, + // } + // }, + // }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + ms := tt.machines() + + got, err := FindIssues(&IssueConfig{ + Machines: ms, + EventContainers: tt.eventContainers(), + Only: tt.only, + LastErrorThreshold: DefaultLastErrorThreshold(), + }) + require.NoError(t, err) + + var want MachineIssues + if tt.want != nil { + want = tt.want(ms) + } + + if diff := cmp.Diff(want, got, cmp.AllowUnexported(IssueLastEventError{}, IssueASNUniqueness{}, IssueNonDistinctBMCIP{})); diff != "" { + t.Errorf("diff (+got -want):\n %s", diff) + } + }) + } +} + +func TestAllIssues(t *testing.T) { + issuesTypes := map[IssueType]bool{} + for _, i := range AllIssues() { + issuesTypes[i.Type] = true + } + + for _, ty := range AllIssueTypes() { + if _, ok := issuesTypes[ty]; !ok { + t.Errorf("issue of type %s not contained in all issues", ty) + } + } +} diff --git a/cmd/metal-api/internal/issues/last-event-error.go b/cmd/metal-api/internal/issues/last-event-error.go new file mode 100644 index 000000000..09ec35999 --- /dev/null +++ b/cmd/metal-api/internal/issues/last-event-error.go @@ -0,0 +1,51 @@ +package issues + +import ( + "fmt" + "time" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +const ( + IssueTypeLastEventError IssueType = "last-event-error" +) + +type ( + IssueLastEventError struct { + details string + } +) + +func DefaultLastErrorThreshold() time.Duration { + return 7 * 24 * time.Hour +} + +func (i *IssueLastEventError) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeLastEventError, + Severity: IssueSeverityMinor, + Description: "the machine had an error during the provisioning lifecycle", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#last-event-error", + } +} + +func (i *IssueLastEventError) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + if c.LastErrorThreshold == 0 { + return false + } + + if ec.LastErrorEvent != nil { + timeSince := time.Since(time.Time(ec.LastErrorEvent.Time)) + if timeSince < c.LastErrorThreshold { + i.details = fmt.Sprintf("occurred %s ago", timeSince.String()) + return true + } + } + + return false +} + +func (i *IssueLastEventError) Details() string { + return i.details +} diff --git a/cmd/metal-api/internal/issues/liveliness-dead.go b/cmd/metal-api/internal/issues/liveliness-dead.go new file mode 100644 index 000000000..88b9a5a2b --- /dev/null +++ b/cmd/metal-api/internal/issues/liveliness-dead.go @@ -0,0 +1,28 @@ +package issues + +import "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + +const ( + IssueTypeLivelinessDead IssueType = "liveliness-dead" +) + +type ( + IssueLivelinessDead struct{} +) + +func (i *IssueLivelinessDead) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeLivelinessDead, + Severity: IssueSeverityMajor, + Description: "the machine is not sending events anymore", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#liveliness-dead", + } +} + +func (i *IssueLivelinessDead) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + return ec.Liveliness.Is(string(metal.MachineLivelinessDead)) +} + +func (i *IssueLivelinessDead) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/liveliness-not-available.go b/cmd/metal-api/internal/issues/liveliness-not-available.go new file mode 100644 index 000000000..11d7af720 --- /dev/null +++ b/cmd/metal-api/internal/issues/liveliness-not-available.go @@ -0,0 +1,33 @@ +package issues + +import "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + +const ( + IssueTypeLivelinessNotAvailable IssueType = "liveliness-not-available" +) + +type ( + IssueLivelinessNotAvailable struct{} +) + +func (i *IssueLivelinessNotAvailable) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeLivelinessNotAvailable, + Severity: IssueSeverityMinor, + Description: "the machine liveliness is not available", + } +} + +func (i *IssueLivelinessNotAvailable) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + allowed := map[metal.MachineLiveliness]bool{ + metal.MachineLivelinessAlive: true, + metal.MachineLivelinessDead: true, + metal.MachineLivelinessUnknown: true, + } + + return !allowed[ec.Liveliness] +} + +func (i *IssueLivelinessNotAvailable) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/liveliness-unknown.go b/cmd/metal-api/internal/issues/liveliness-unknown.go new file mode 100644 index 000000000..b306f82b8 --- /dev/null +++ b/cmd/metal-api/internal/issues/liveliness-unknown.go @@ -0,0 +1,28 @@ +package issues + +import "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + +const ( + IssueTypeLivelinessUnknown IssueType = "liveliness-unknown" +) + +type ( + IssueLivelinessUnknown struct{} +) + +func (i *IssueLivelinessUnknown) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeLivelinessUnknown, + Severity: IssueSeverityMajor, + Description: "the machine is not sending LLDP alive messages anymore", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#liveliness-unknown", + } +} + +func (i *IssueLivelinessUnknown) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + return ec.Liveliness.Is(string(metal.MachineLivelinessUnknown)) +} + +func (i *IssueLivelinessUnknown) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/no-event-container.go b/cmd/metal-api/internal/issues/no-event-container.go new file mode 100644 index 000000000..314510348 --- /dev/null +++ b/cmd/metal-api/internal/issues/no-event-container.go @@ -0,0 +1,30 @@ +package issues + +import ( + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +const ( + IssueTypeNoEventContainer IssueType = "no-event-container" +) + +type ( + IssueNoEventContainer struct{} +) + +func (i *IssueNoEventContainer) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeNoEventContainer, + Severity: IssueSeverityMajor, + Description: "machine has no event container", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#no-event-container", + } +} + +func (i *IssueNoEventContainer) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + return ec.Base.ID == "" +} + +func (i *IssueNoEventContainer) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/no-partition.go b/cmd/metal-api/internal/issues/no-partition.go new file mode 100644 index 000000000..85e4e6460 --- /dev/null +++ b/cmd/metal-api/internal/issues/no-partition.go @@ -0,0 +1,28 @@ +package issues + +import "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + +const ( + IssueTypeNoPartition IssueType = "no-partition" +) + +type ( + IssueNoPartition struct{} +) + +func (i *IssueNoPartition) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeNoPartition, + Severity: IssueSeverityMajor, + Description: "machine with no partition", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#no-partition", + } +} + +func (i *IssueNoPartition) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + return m.PartitionID == "" +} + +func (i *IssueNoPartition) Details() string { + return "" +} diff --git a/cmd/metal-api/internal/issues/non-distinct-bmc-ip.go b/cmd/metal-api/internal/issues/non-distinct-bmc-ip.go new file mode 100644 index 000000000..29a35023d --- /dev/null +++ b/cmd/metal-api/internal/issues/non-distinct-bmc-ip.go @@ -0,0 +1,65 @@ +package issues + +import ( + "fmt" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +const ( + IssueTypeNonDistinctBMCIP IssueType = "bmc-no-distinct-ip" +) + +type ( + IssueNonDistinctBMCIP struct { + details string + } +) + +func (i *IssueNonDistinctBMCIP) Spec() *issueSpec { + return &issueSpec{ + Type: IssueTypeNonDistinctBMCIP, + Description: "BMC IP address is not distinct", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#bmc-no-distinct-ip", + } +} + +func (i *IssueNonDistinctBMCIP) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *IssueConfig) bool { + if m.IPMI.Address == "" { + return false + } + + var ( + bmcIP = m.IPMI.Address + overlaps []string + ) + + for _, machineFromAll := range c.Machines { + machineFromAll := machineFromAll + + if machineFromAll.ID == m.ID { + continue + } + otherMachine := machineFromAll + + if otherMachine.IPMI.Address == "" { + continue + } + + if bmcIP == otherMachine.IPMI.Address { + overlaps = append(overlaps, otherMachine.ID) + } + } + + if len(overlaps) == 0 { + return false + } + + i.details = fmt.Sprintf("BMC IP (%s) not unique, shared with %s", bmcIP, overlaps) + + return true +} + +func (i *IssueNonDistinctBMCIP) Details() string { + return i.details +} diff --git a/cmd/metal-api/internal/issues/severeties.go b/cmd/metal-api/internal/issues/severeties.go new file mode 100644 index 000000000..624195bab --- /dev/null +++ b/cmd/metal-api/internal/issues/severeties.go @@ -0,0 +1,48 @@ +package issues + +import "fmt" + +const ( + // IssueSeverityMinor is an issue that should be checked from time to time but has no bad effects for the user. + IssueSeverityMinor IssueSeverity = "minor" + // IssueSeverityMajor is an issue where user experience is affected or provider resources are wasted. + // overall functionality is still maintained though. major issues should be resolved as soon as possible. + IssueSeverityMajor IssueSeverity = "major" + // IssueSeverityCritical is an issue that can lead to disfunction of the system and need to be handled as quickly as possible. + IssueSeverityCritical IssueSeverity = "critical" +) + +type ( + IssueSeverity string +) + +func AllSevereties() []IssueSeverity { + return []IssueSeverity{ + IssueSeverityMinor, + IssueSeverityMajor, + IssueSeverityCritical, + } +} + +func SeverityFromString(input string) (IssueSeverity, error) { + switch IssueSeverity(input) { + case IssueSeverityCritical: + return IssueSeverityCritical, nil + case IssueSeverityMajor: + return IssueSeverityMajor, nil + case IssueSeverityMinor: + return IssueSeverityMinor, nil + default: + return "", fmt.Errorf("unknown issue severity: %s", input) + } +} + +func (s IssueSeverity) LowerThan(o IssueSeverity) bool { + smap := map[IssueSeverity]int{ + IssueSeverityCritical: 10, + IssueSeverityMajor: 5, + IssueSeverityMinor: 0, + } + + return smap[s] < smap[o] +} diff --git a/cmd/metal-api/internal/issues/types.go b/cmd/metal-api/internal/issues/types.go new file mode 100644 index 000000000..8b15a672f --- /dev/null +++ b/cmd/metal-api/internal/issues/types.go @@ -0,0 +1,58 @@ +package issues + +import "fmt" + +type ( + IssueType string +) + +func AllIssueTypes() []IssueType { + return []IssueType{ + IssueTypeNoPartition, + IssueTypeLivelinessDead, + IssueTypeLivelinessUnknown, + IssueTypeLivelinessNotAvailable, + IssueTypeFailedMachineReclaim, + IssueTypeCrashLoop, + IssueTypeLastEventError, + IssueTypeBMCWithoutMAC, + IssueTypeBMCWithoutIP, + IssueTypeBMCInfoOutdated, + IssueTypeASNUniqueness, + IssueTypeNonDistinctBMCIP, + IssueTypeNoEventContainer, + } +} + +func newIssueFromType(t IssueType) (issueImpl, error) { + switch t { + case IssueTypeNoPartition: + return &IssueNoPartition{}, nil + case IssueTypeLivelinessDead: + return &IssueLivelinessDead{}, nil + case IssueTypeLivelinessUnknown: + return &IssueLivelinessUnknown{}, nil + case IssueTypeLivelinessNotAvailable: + return &IssueLivelinessNotAvailable{}, nil + case IssueTypeFailedMachineReclaim: + return &IssueFailedMachineReclaim{}, nil + case IssueTypeCrashLoop: + return &IssueCrashLoop{}, nil + case IssueTypeLastEventError: + return &IssueLastEventError{}, nil + case IssueTypeBMCWithoutMAC: + return &IssueBMCWithoutMAC{}, nil + case IssueTypeBMCWithoutIP: + return &IssueBMCWithoutIP{}, nil + case IssueTypeBMCInfoOutdated: + return &IssueBMCInfoOutdated{}, nil + case IssueTypeASNUniqueness: + return &IssueASNUniqueness{}, nil + case IssueTypeNonDistinctBMCIP: + return &IssueNonDistinctBMCIP{}, nil + case IssueTypeNoEventContainer: + return &IssueNoEventContainer{}, nil + default: + return nil, fmt.Errorf("unknown issue type: %s", t) + } +} diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index bf63ff9f2..c8fd91b39 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -11,6 +11,7 @@ import ( "time" "github.com/metal-stack/metal-api/cmd/metal-api/internal/headscale" + "github.com/metal-stack/metal-api/cmd/metal-api/internal/issues" "github.com/metal-stack/metal-lib/auditing" "github.com/avast/retry-go/v4" @@ -243,6 +244,17 @@ func (r *machineResource) webService() *restful.WebService { Returns(http.StatusOK, "OK", v1.MachineResponse{}). DefaultReturns("Error", httperrors.HTTPErrorResponse{})) + ws.Route(ws.POST("/issues"). + To(viewer(r.issues)). + Operation("issues"). + Doc("returns machine issues"). + Metadata(restfulspec.KeyOpenAPITags, tags). + Metadata(auditing.Exclude, true). + Reads(v1.MachineIssuesRequest{}). + Writes([]v1.MachineIssueResponse{}). + Returns(http.StatusOK, "OK", []v1.MachineIssueResponse{}). + DefaultReturns("Error", httperrors.HTTPErrorResponse{})) + ws.Route(ws.POST("/ipmi"). To(editor(r.ipmiReport)). Operation("ipmiReport"). @@ -483,6 +495,73 @@ func (r *machineResource) updateMachine(request *restful.Request, response *rest r.send(request, response, http.StatusOK, resp) } +func (r *machineResource) issues(request *restful.Request, response *restful.Response) { + var requestPayload v1.MachineIssuesRequest + err := request.ReadEntity(&requestPayload) + if err != nil { + r.sendError(request, response, httperrors.BadRequest(err)) + return + } + + ms := metal.Machines{} + err = r.ds.SearchMachines(&requestPayload.MachineSearchQuery, &ms) + if err != nil { + r.sendError(request, response, defaultError(err)) + return + } + + ecs, err := r.ds.ListProvisioningEventContainers() + if err != nil { + r.sendError(request, response, defaultError(err)) + return + } + + var ( + severity = issues.IssueSeverityMinor + only []issues.IssueType + omit []issues.IssueType + lastErrorThreshold = issues.DefaultLastErrorThreshold() + ) + + issues, err := issues.FindIssues(&issues.IssueConfig{ + Machines: ms, + EventContainers: ecs, + Severity: severity, + Only: only, + Omit: omit, + LastErrorThreshold: lastErrorThreshold, + }) + if err != nil { + r.sendError(request, response, defaultError(err)) + return + } + + var issueResponse []*v1.MachineIssueResponse + for _, machineWithIssues := range issues { + machineWithIssues := machineWithIssues + + entry := &v1.MachineIssueResponse{ + MachineID: machineWithIssues.Machine.ID, + } + + for _, issue := range machineWithIssues.Issues { + issue := issue + + entry.Issues = append(entry.Issues, v1.MachineIssue{ + ID: string(issue.Type), + Severity: string(issue.Severity), + Description: issue.Description, + RefURL: issue.RefURL, + Details: issue.Details, + }) + } + + issueResponse = append(issueResponse, entry) + } + + r.send(request, response, http.StatusOK, issueResponse) +} + func (r *machineResource) getMachineConsolePassword(request *restful.Request, response *restful.Response) { var requestPayload v1.MachineConsolePasswordRequest err := request.ReadEntity(&requestPayload) diff --git a/cmd/metal-api/internal/service/v1/machine.go b/cmd/metal-api/internal/service/v1/machine.go index 3a83c1801..a964a2f63 100644 --- a/cmd/metal-api/internal/service/v1/machine.go +++ b/cmd/metal-api/internal/service/v1/machine.go @@ -4,6 +4,7 @@ import ( "time" "github.com/metal-stack/metal-api/cmd/metal-api/internal/datastore" + "github.com/metal-stack/metal-api/cmd/metal-api/internal/issues" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" ) @@ -227,6 +228,7 @@ type MachineConsolePasswordRequest struct { ID string `json:"id" description:"id of the machine to get the consolepassword for"` Reason string `json:"reason" description:"reason why the consolepassword is requested, typically a incident number with short description"` } + type MachineConsolePasswordResponse struct { Common ConsolePassword string `json:"console_password" description:"the console password which was generated while provisioning"` @@ -264,6 +266,16 @@ type MachineReinstallRequest struct { ImageID string `json:"imageid" description:"the image id to be installed"` } +type MachineIssuesRequest struct { + datastore.MachineSearchQuery + + Only []issues.IssueType `json:"only" description:"a list of machine issues to include"` + Omit []issues.IssueType `json:"omit" description:"a list of machine issues to omit"` + + Severity string `json:"severity" description:"filters issue for given severity"` + LastErrorThreshold time.Duration `json:"last_error_threshold" description:"defines the last error threshold"` +} + type MachineAbortReinstallRequest struct { PrimaryDiskWiped bool `json:"primary_disk_wiped" description:"indicates whether the primary disk is already wiped"` } @@ -274,6 +286,19 @@ type MachineVPN struct { Connected bool `json:"connected" description:"connected to the VPN"` } +type MachineIssueResponse struct { + MachineID string `json:"machineid" description:"the machine id that has the given issues"` + Issues []MachineIssue `json:"issues" description:"the list of issues of this machine"` +} + +type MachineIssue struct { + ID string `json:"id" description:"the id of the issue"` + Severity string `json:"severity" description:"the severity of the issue"` + Description string `json:"description" description:"a description of the issue"` + RefURL string `json:"ref_url" description:"an issue reference to the issue in metal-stack docs"` + Details string `json:"details" description:"details of the issue"` +} + func NewMetalMachineHardware(r *MachineHardware) metal.MachineHardware { nics := metal.Nics{} for i := range r.Nics { diff --git a/spec/metal-api.json b/spec/metal-api.json index dc97a467e..acbf4f4f4 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -2736,6 +2736,267 @@ } } }, + "v1.MachineIssue": { + "properties": { + "description": { + "description": "a description of the issue", + "type": "string" + }, + "details": { + "description": "details of the issue", + "type": "string" + }, + "id": { + "description": "the id of the issue", + "type": "string" + }, + "ref_url": { + "description": "an issue reference to the issue in metal-stack docs", + "type": "string" + }, + "severity": { + "description": "the severity of the issue", + "type": "string" + } + }, + "required": [ + "description", + "details", + "id", + "ref_url", + "severity" + ] + }, + "v1.MachineIssueResponse": { + "properties": { + "issues": { + "description": "the list of issues of this machine", + "items": { + "$ref": "#/definitions/v1.MachineIssue" + }, + "type": "array" + }, + "machineid": { + "description": "the machine id that has the given issues", + "type": "string" + } + }, + "required": [ + "issues", + "machineid" + ] + }, + "v1.MachineIssuesRequest": { + "properties": { + "allocation_hostname": { + "type": "string" + }, + "allocation_image_id": { + "type": "string" + }, + "allocation_name": { + "type": "string" + }, + "allocation_project": { + "type": "string" + }, + "allocation_role": { + "type": "string" + }, + "allocation_succeeded": { + "type": "boolean" + }, + "disk_names": { + "items": { + "type": "string" + }, + "type": "array" + }, + "disk_sizes": { + "items": { + "format": "int64", + "type": "integer" + }, + "type": "array" + }, + "fru_board_mfg": { + "type": "string" + }, + "fru_board_mfg_serial": { + "type": "string" + }, + "fru_board_part_number": { + "type": "string" + }, + "fru_chassis_part_number": { + "type": "string" + }, + "fru_chassis_part_serial": { + "type": "string" + }, + "fru_product_manufacturer": { + "type": "string" + }, + "fru_product_part_number": { + "type": "string" + }, + "fru_product_serial": { + "type": "string" + }, + "hardware_cpu_cores": { + "format": "int64", + "type": "integer" + }, + "hardware_memory": { + "format": "int64", + "type": "integer" + }, + "id": { + "type": "string" + }, + "ipmi_address": { + "type": "string" + }, + "ipmi_interface": { + "type": "string" + }, + "ipmi_mac_address": { + "type": "string" + }, + "ipmi_user": { + "type": "string" + }, + "last_error_threshold": { + "description": "defines the last error threshold", + "format": "int64", + "type": "integer" + }, + "name": { + "type": "string" + }, + "network_asns": { + "items": { + "format": "int64", + "type": "integer" + }, + "type": "array" + }, + "network_destination_prefixes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "network_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "network_ips": { + "items": { + "type": "string" + }, + "type": "array" + }, + "network_prefixes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "network_vrfs": { + "items": { + "format": "int64", + "type": "integer" + }, + "type": "array" + }, + "nics_mac_addresses": { + "items": { + "type": "string" + }, + "type": "array" + }, + "nics_names": { + "items": { + "type": "string" + }, + "type": "array" + }, + "nics_neighbor_mac_addresses": { + "items": { + "type": "string" + }, + "type": "array" + }, + "nics_neighbor_names": { + "items": { + "type": "string" + }, + "type": "array" + }, + "nics_neighbor_vrfs": { + "items": { + "type": "string" + }, + "type": "array" + }, + "nics_vrfs": { + "items": { + "type": "string" + }, + "type": "array" + }, + "omit": { + "description": "a list of machine issues to omit", + "items": { + "type": "string" + }, + "type": "array" + }, + "only": { + "description": "a list of machine issues to include", + "items": { + "type": "string" + }, + "type": "array" + }, + "partition_id": { + "type": "string" + }, + "rackid": { + "type": "string" + }, + "severity": { + "description": "filters issue for given severity", + "type": "string" + }, + "sizeid": { + "type": "string" + }, + "state_value": { + "enum": [ + "", + "LOCKED", + "RESERVED" + ], + "type": "string" + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "last_error_threshold", + "omit", + "only", + "severity" + ] + }, "v1.MachineNetwork": { "description": "prefixes that are reachable within this network", "properties": { @@ -6507,6 +6768,48 @@ ] } }, + "/v1/machine/issues": { + "post": { + "consumes": [ + "application/json" + ], + "operationId": "issues", + "parameters": [ + { + "in": "body", + "name": "body", + "required": true, + "schema": { + "$ref": "#/definitions/v1.MachineIssuesRequest" + } + } + ], + "produces": [ + "application/json" + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "items": { + "$ref": "#/definitions/v1.MachineIssueResponse" + }, + "type": "array" + } + }, + "default": { + "description": "Error", + "schema": { + "$ref": "#/definitions/httperrors.HTTPErrorResponse" + } + } + }, + "summary": "returns machine issues", + "tags": [ + "machine" + ] + } + }, "/v1/machine/update-firmware/{id}": { "post": { "consumes": [