Skip to content

Commit

Permalink
gracefully handle host in shut without causing unhealthy alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
czerwonk committed Jun 2, 2023
1 parent 96a089f commit a56fe43
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 19 deletions.
4 changes: 2 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ import (
"github.com/sirupsen/logrus"
)

const version string = "0.3.0"
const version string = "0.3.1"

var (
showVersion = flag.Bool("version", false, "Print version information.")
listenAddress = flag.String("web.listen-address", ":9545", "Address on which to expose metrics and web interface.")
metricsPath = flag.String("web.telemetry-path", "/metrics", "Path under which to expose metrics.")
username = flag.String("api.username", "", "Username")
username = flag.String("api.username", "Administrator", "Username")
password = flag.String("api.password", "", "Password")
maxConcurrentRequests = flag.Uint("api.max-concurrent-requests", 4, "Maximum number of requests sent against API concurrently")
tlsEnabled = flag.Bool("tls.enabled", false, "Enables TLS")
Expand Down
21 changes: 15 additions & 6 deletions pkg/chassis/power/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ package power
import (
"context"

"github.com/MauveSoftware/ilo4_exporter/pkg/common"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"

"github.com/MauveSoftware/ilo4_exporter/pkg/common"
)

const (
Expand Down Expand Up @@ -69,12 +70,20 @@ func Collect(ctx context.Context, parentPath string, cc *common.CollectorContext
)

for _, sup := range pwr.PowerSupplies {
la := append(l, sup.SerialNumber)
cc.RecordMetrics(
prometheus.MustNewConstMetric(powerSupplyEnabledDesc, prometheus.GaugeValue, sup.Status.EnabledValue(), la...),
prometheus.MustNewConstMetric(powerSupplyHealthyDesc, prometheus.GaugeValue, sup.Status.HealthValue(), la...),
)
collectForPowerSupply(sup, l, cc)
}

return nil
}

func collectForPowerSupply(sup PowerSupply, labelVals []string, cc *common.CollectorContext) {
if sup.Status.State == "Absent" {
return
}

la := append(labelVals, sup.SerialNumber)
cc.RecordMetrics(
prometheus.MustNewConstMetric(powerSupplyEnabledDesc, prometheus.GaugeValue, sup.Status.EnabledValue(), la...),
prometheus.MustNewConstMetric(powerSupplyHealthyDesc, prometheus.GaugeValue, sup.Status.HealthValue(), la...),
)
}
11 changes: 10 additions & 1 deletion pkg/chassis/thermal/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ package thermal
import (
"context"

"github.com/MauveSoftware/ilo4_exporter/pkg/common"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"

"github.com/MauveSoftware/ilo4_exporter/pkg/common"
)

const (
Expand Down Expand Up @@ -75,6 +76,10 @@ func Collect(ctx context.Context, parentPath string, cc *common.CollectorContext
}

func collectForFan(hostName string, f *Fan, cc *common.CollectorContext) {
if f.Status.State == "Offline" {
return
}

l := []string{hostName, f.Name}
cc.RecordMetrics(
prometheus.MustNewConstMetric(fanHealthyDesc, prometheus.GaugeValue, f.Status.HealthValue(), l...),
Expand All @@ -84,6 +89,10 @@ func collectForFan(hostName string, f *Fan, cc *common.CollectorContext) {
}

func collectForTemperature(hostName string, t *Temperature, cc *common.CollectorContext) {
if t.Status.State == "Offline" {
return
}

l := []string{hostName, t.Name}
cc.RecordMetrics(
prometheus.MustNewConstMetric(tempCurrentDesc, prometheus.GaugeValue, t.ReadingCelsius, l...),
Expand Down
16 changes: 6 additions & 10 deletions pkg/system/memory/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,23 @@ import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"

"github.com/MauveSoftware/ilo4_exporter/pkg/common"
"github.com/prometheus/client_golang/prometheus"

"github.com/MauveSoftware/ilo4_exporter/pkg/common"
)

const (
prefix = "ilo4_memory_"
)

var (
healthyDesc *prometheus.Desc
totalMemory *prometheus.Desc
dimmHealthyDesc *prometheus.Desc
dimmSizeDesc *prometheus.Desc
)

func init() {
l := []string{"host"}
healthyDesc = prometheus.NewDesc(prefix+"healthy", "Health status of the memory", l, nil)
totalMemory = prometheus.NewDesc(prefix+"total_byte", "Total memory installed in bytes", l, nil)

l = append(l, "name")
Expand All @@ -39,7 +38,6 @@ func init() {

// Describe describes all metrics for the memory package
func Describe(ch chan<- *prometheus.Desc) {
ch <- healthyDesc
ch <- totalMemory
ch <- dimmHealthyDesc
ch <- dimmSizeDesc
Expand All @@ -61,14 +59,8 @@ func Collect(systemPath string, cc *common.CollectorContext) {
return
}

var healthy float64
if strings.ToLower(m.MemorySummary.Status.HealthRollUp) == "ok" {
healthy = 1
}

hostname := cc.Client().HostName()
cc.RecordMetrics(
prometheus.MustNewConstMetric(healthyDesc, prometheus.GaugeValue, healthy, hostname),
prometheus.MustNewConstMetric(totalMemory, prometheus.GaugeValue, float64(m.MemorySummary.TotalSystemMemoryGiB<<30), hostname),
)

Expand Down Expand Up @@ -117,6 +109,10 @@ func collectForDIMM(ctx context.Context, link string, cc *common.CollectorContex

l := []string{cc.Client().HostName(), d.Name}

if d.DIMMStatus == "Unknown" {
return
}

var healthy float64
if d.DIMMStatus == "GoodInUse" {
healthy = 1
Expand Down

0 comments on commit a56fe43

Please sign in to comment.