Skip to content

Commit

Permalink
Add Icinga states to pod
Browse files Browse the repository at this point in the history
  • Loading branch information
jhoxhaa committed May 22, 2024
1 parent 230491b commit 17ae44f
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 0 deletions.
99 changes: 99 additions & 0 deletions pkg/schema/v1/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package v1

import (
"database/sql"
"fmt"
"github.com/icinga/icinga-kubernetes/pkg/database"
"github.com/icinga/icinga-kubernetes/pkg/strcase"
"github.com/icinga/icinga-kubernetes/pkg/types"
Expand All @@ -10,8 +11,15 @@ import (
ktypes "k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"strings"
"time"
)

const Ok = "ok"
const Warning = "warning"
const Critical = "critical"
const Unknown = "unknown"
const prolongedInitializationThreshold = 10 * time.Minute

type PodFactory struct {
clientset *kubernetes.Clientset
}
Expand All @@ -23,6 +31,8 @@ type Pod struct {
NominatedNodeName string
Ip string
Phase string
IcingaState string
IcingaStateReason string
CpuLimits int64
CpuRequests int64
MemoryLimits int64
Expand Down Expand Up @@ -99,6 +109,7 @@ func (p *Pod) Obtain(k8s kmetav1.Object) {
p.NominatedNodeName = pod.Status.NominatedNodeName
p.Ip = pod.Status.PodIP
p.Phase = strcase.Snake(string(pod.Status.Phase))
p.IcingaState, p.IcingaStateReason = getIcingaState(pod)
p.Reason = pod.Status.Reason
p.Message = pod.Status.Message
p.Qos = strcase.Snake(string(pod.Status.QOSClass))
Expand Down Expand Up @@ -269,6 +280,94 @@ func (p *Pod) Obtain(k8s kmetav1.Object) {
}
}

func getIcingaState(pod *kcorev1.Pod) (string, string) {
readyContainers := 0
state := Unknown
reason := string(pod.Status.Phase)

if pod.Status.Reason != "" {
reason = pod.Status.Reason
}

if pod.DeletionTimestamp != nil {
reason = fmt.Sprintf("Pod %s is being deleted", pod.Name)
return Ok, reason
}

initializing := false
for i, container := range pod.Status.InitContainerStatuses {
switch {
case container.State.Terminated != nil && container.State.Terminated.ExitCode == 0:
continue
case container.State.Terminated != nil:
state = Critical
reason = fmt.Sprintf("Init container %s terminated with non-zero exit code %d: %s", container.Name, container.State.Terminated.ExitCode, container.State.Terminated.Reason)
initializing = true
case container.State.Waiting != nil && len(container.State.Waiting.Reason) > 0 && container.State.Waiting.Reason != "PodInitializing":
state = Critical
reason = fmt.Sprintf("Init container %s is waiting: %s", container.Name, container.State.Waiting.Reason)
initializing = true
default:
initializing = true
if container.State.Running != nil {
duration := time.Since(container.State.Running.StartedAt.Time)
if duration > prolongedInitializationThreshold {
state = Warning
reason = fmt.Sprintf("Init container %s has been initializing for too long (%d/%d, %s elapsed)", container.Name, i+1, len(pod.Spec.InitContainers), duration)
} else {
reason = fmt.Sprintf("Init container %s is currently initializing (%d/%d)", container.Name, i+1, len(pod.Spec.InitContainers))
}
}
}
break
}
if !initializing {
hasRunning := false
for _, container := range pod.Status.ContainerStatuses {
if !container.Ready {
state = Critical
reason = fmt.Sprintf("Container %s is not ready", container.Name)
}
if container.State.Waiting != nil && container.State.Waiting.Reason != "" && container.RestartCount >= 3 {
state = Critical
reason = fmt.Sprintf("Container %s is waiting and has restarted %d times: %s", container.Name, container.RestartCount, container.State.Waiting.Reason)
} else if container.State.Terminated != nil && container.State.Terminated.Reason != "" && container.State.Terminated.ExitCode == 0 {
state = Ok
reason = fmt.Sprintf("Container %s terminated normally", container.Name)
} else if container.State.Terminated != nil && container.State.Terminated.Reason == "" {
state = Critical
reason = fmt.Sprintf("Container %s terminated abnormally", container.Name)
} else if container.Ready && container.State.Running != nil {
readyContainers++
hasRunning = true
state = Ok
reason = fmt.Sprintf("Container %s is running", container.Name)
}
}

if reason == "Completed" && hasRunning {
for _, condition := range pod.Status.Conditions {
if pod.Status.Phase == kcorev1.PodRunning {
if condition.Type == kcorev1.PodReady && condition.Status == kcorev1.ConditionTrue {
state = Ok
reason = fmt.Sprintf("Pod %s is %s", pod.Name, string(kcorev1.PodRunning))
} else {
state = Critical
reason = fmt.Sprintf("Pod %s is not ready", pod.Name)
}
}
}
}
}

if readyContainers == len(pod.Spec.Containers) {
state = Ok
reason = "All containers are ready"
}

return state, reason
}

func (p *Pod) Relations() []database.Relation {
fk := database.WithForeignKey("pod_id")

Expand Down
2 changes: 2 additions & 0 deletions schema/mysql/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ CREATE TABLE pod (
memory_limits bigint unsigned NOT NULL,
memory_requests bigint unsigned NOT NULL,
phase enum('pending', 'running', 'succeeded', 'failed') COLLATE utf8mb4_unicode_ci NOT NULL,
icinga_state enum('ok', 'warning', 'critical', 'unknown') COLLATE utf8mb4_unicode_ci NOT NULL,
icinga_state_reason text NULL DEFAULT NULL,
reason varchar(255) NULL DEFAULT NULL,
message varchar(255) NULL DEFAULT NULL,
qos enum('guaranteed', 'burstable', 'best_effort') COLLATE utf8mb4_unicode_ci NOT NULL,
Expand Down

0 comments on commit 17ae44f

Please sign in to comment.