From 1921b280ff39999d4c5007f597115ac6285cdc63 Mon Sep 17 00:00:00 2001 From: Jonada Hoxha Date: Tue, 2 Jul 2024 13:59:51 +0200 Subject: [PATCH 1/2] Add `Icinga states` to job --- pkg/schema/v1/job.go | 38 ++++++++++++++++++++++++++++++++++++++ schema/mysql/schema.sql | 2 ++ 2 files changed, 40 insertions(+) diff --git a/pkg/schema/v1/job.go b/pkg/schema/v1/job.go index 97a4fefe..3a3b8d25 100644 --- a/pkg/schema/v1/job.go +++ b/pkg/schema/v1/job.go @@ -2,10 +2,12 @@ package v1 import ( "database/sql" + "fmt" "github.com/icinga/icinga-go-library/types" "github.com/icinga/icinga-kubernetes/pkg/database" "github.com/icinga/icinga-kubernetes/pkg/strcase" kbatchv1 "k8s.io/api/batch/v1" + kcorev1 "k8s.io/api/core/v1" kmetav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kruntime "k8s.io/apimachinery/pkg/runtime" kserializer "k8s.io/apimachinery/pkg/runtime/serializer" @@ -28,6 +30,8 @@ type Job struct { Succeeded int32 Failed int32 Yaml string + IcingaState IcingaState + IcingaStateReason string Conditions []JobCondition `db:"-"` Labels []Label `db:"-"` JobLabels []JobLabel `db:"-"` @@ -120,6 +124,7 @@ func (j *Job) Obtain(k8s kmetav1.Object) { j.Active = job.Status.Active j.Succeeded = job.Status.Succeeded j.Failed = job.Status.Failed + j.IcingaState, j.IcingaStateReason = j.getIcingaState(job) for _, condition := range job.Status.Conditions { j.Conditions = append(j.Conditions, JobCondition{ @@ -166,6 +171,39 @@ func (j *Job) Obtain(k8s kmetav1.Object) { j.Yaml = string(output) } +func (j *Job) getIcingaState(job *kbatchv1.Job) (IcingaState, string) { + for _, condition := range job.Status.Conditions { + switch condition.Type { + case kbatchv1.JobComplete: + if condition.Status == kcorev1.ConditionTrue { + return Ok, fmt.Sprintf( + "Job %s/%s has completed its execution successfully with %d necessary pod completions.", + j.Namespace, j.Name, job.Spec.Completions) + } + case kbatchv1.JobFailed: + if condition.Status == kcorev1.ConditionTrue { + return Critical, fmt.Sprintf( + "Job %s/%s has failed its execution. %s: %s.", + j.Namespace, j.Name, condition.Reason, condition.Message) + } + default: + reason := fmt.Sprintf( + "Job %s/%s is running since %s with currently %d active, %d completed and %d failed pods. "+ + "Successful termination requires %d pod completions. The back-off limit is %d.", + j.Namespace, j.Name, job.Status.StartTime, j.Active, j.Succeeded, j.Failed, job.Spec.Completions, job.Spec.BackoffLimit) + + if job.Spec.ActiveDeadlineSeconds != nil { + reason += fmt.Sprintf("Deadline for completion is %d.", job.Spec.ActiveDeadlineSeconds) + } + + return Pending, reason + } + } + + return Unknown, fmt.Sprintf( + "Job %s/%s is in an unknown state. No condition met.", j.Namespace, j.Name) +} + func (j *Job) Relations() []database.Relation { fk := database.WithForeignKey("job_uuid") diff --git a/schema/mysql/schema.sql b/schema/mysql/schema.sql index 1bab960b..20bc538f 100644 --- a/schema/mysql/schema.sql +++ b/schema/mysql/schema.sql @@ -792,6 +792,8 @@ CREATE TABLE job ( succeeded int unsigned NOT NULL, failed int unsigned NOT NULL, yaml mediumblob DEFAULT NULL, + icinga_state enum('pending', 'ok', 'warning', 'critical', 'unknown') COLLATE utf8mb4_unicode_ci NOT NULL, + icinga_state_reason text NOT NULL, created bigint unsigned NOT NULL, PRIMARY KEY (uuid) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; From 2c711c344dd26dacfc42c4322efe539bd7bd6b13 Mon Sep 17 00:00:00 2001 From: Eric Lippmann Date: Mon, 29 Jul 2024 10:26:16 +0200 Subject: [PATCH 2/2] Account all job conditions for Icinga state --- pkg/schema/v1/job.go | 64 ++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/pkg/schema/v1/job.go b/pkg/schema/v1/job.go index 3a3b8d25..abd4c863 100644 --- a/pkg/schema/v1/job.go +++ b/pkg/schema/v1/job.go @@ -173,35 +173,59 @@ func (j *Job) Obtain(k8s kmetav1.Object) { func (j *Job) getIcingaState(job *kbatchv1.Job) (IcingaState, string) { for _, condition := range job.Status.Conditions { + if condition.Status != kcorev1.ConditionTrue { + continue + } + switch condition.Type { + case kbatchv1.JobSuccessCriteriaMet: + return Ok, fmt.Sprintf( + "Job %s/%s met its sucess criteria.", + j.Namespace, j.Name) case kbatchv1.JobComplete: - if condition.Status == kcorev1.ConditionTrue { - return Ok, fmt.Sprintf( - "Job %s/%s has completed its execution successfully with %d necessary pod completions.", - j.Namespace, j.Name, job.Spec.Completions) - } - case kbatchv1.JobFailed: - if condition.Status == kcorev1.ConditionTrue { - return Critical, fmt.Sprintf( - "Job %s/%s has failed its execution. %s: %s.", - j.Namespace, j.Name, condition.Reason, condition.Message) - } - default: reason := fmt.Sprintf( - "Job %s/%s is running since %s with currently %d active, %d completed and %d failed pods. "+ - "Successful termination requires %d pod completions. The back-off limit is %d.", - j.Namespace, j.Name, job.Status.StartTime, j.Active, j.Succeeded, j.Failed, job.Spec.Completions, job.Spec.BackoffLimit) + "Job %s/%s has completed its execution successfully with", + j.Namespace, j.Name) - if job.Spec.ActiveDeadlineSeconds != nil { - reason += fmt.Sprintf("Deadline for completion is %d.", job.Spec.ActiveDeadlineSeconds) + if j.Completions.Valid { + reason += fmt.Sprintf(" %d necessary pod completions.", j.Completions.Int32) + } else { + reason += " any pod completion." } - return Pending, reason + return Ok, reason + case kbatchv1.JobFailed: + return Critical, fmt.Sprintf( + "Job %s/%s has failed its execution. %s: %s.", + j.Namespace, j.Name, condition.Reason, condition.Message) + case kbatchv1.JobFailureTarget: + return Warning, fmt.Sprintf( + "Job %s/%s is about to fail its execution. %s: %s.", + j.Namespace, j.Name, condition.Reason, condition.Message) + case kbatchv1.JobSuspended: + return Ok, fmt.Sprintf( + "Job %s/%s is suspended.", + j.Namespace, j.Name) } } - return Unknown, fmt.Sprintf( - "Job %s/%s is in an unknown state. No condition met.", j.Namespace, j.Name) + var completions string + if j.Completions.Valid { + completions = fmt.Sprintf("%d pod completions", j.Completions.Int32) + } else { + completions = "any pod completion" + } + + reason := fmt.Sprintf( + "Job %s/%s is running since %s with currently %d active, %d completed and %d failed pods. "+ + "Successful termination requires %s. The back-off limit is %d.", + j.Namespace, j.Name, job.Status.StartTime, j.Active, j.Succeeded, j.Failed, completions, job.Spec.BackoffLimit) + + if job.Spec.ActiveDeadlineSeconds != nil { + reason += fmt.Sprintf(" Deadline for completion is %d.", job.Spec.ActiveDeadlineSeconds) + } + + return Pending, reason } func (j *Job) Relations() []database.Relation {