From 52bb4071f870b97728a75e60a1836f9fc2aecf03 Mon Sep 17 00:00:00 2001 From: JeromeJu Date: Fri, 17 Nov 2023 18:19:31 +0000 Subject: [PATCH] Error sweep: refactor steps termination when failing TaskRun This commit refactors the step termination logics once the pod for the TaskRun is marked failed. It adds to the container termination message and complete the prerequisites for differentiating pod termination reason with TaskRunReasons which are currently used to cover container termination reasons. --- pkg/reconciler/taskrun/taskrun.go | 25 ++++++++++++++++--------- pkg/reconciler/taskrun/taskrun_test.go | 9 +++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index c43a0770268..508e26ef840 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -724,25 +724,32 @@ func (c *Reconciler) failTaskRun(ctx context.Context, tr *v1.TaskRun, reason v1. var err error if reason == v1.TaskRunReasonCancelled && (config.FromContextOrDefaults(ctx).FeatureFlags.EnableKeepPodOnCancel && config.FromContextOrDefaults(ctx).FeatureFlags.EnableAPIFields == config.AlphaAPIFields) { - logger.Infof("canceling task run %q by entrypoint", tr.Name) + logger.Infof("Canceling task run %q by entrypoint", tr.Name) err = podconvert.CancelPod(ctx, c.KubeClientSet, tr.Namespace, tr.Status.PodName) } else { err = c.KubeClientSet.CoreV1().Pods(tr.Namespace).Delete(ctx, tr.Status.PodName, metav1.DeleteOptions{}) } if err != nil && !k8serrors.IsNotFound(err) { - logger.Infof("Failed to terminate pod: %v", err) + logger.Errorf("Failed to terminate pod %s: %v", tr.Status.PodName, err) return err } - // Update step states for TaskRun on TaskRun object since pod has been deleted for cancel or timeout + terminateStepsInPod(tr, reason) + return nil +} + +// terminateStepsInPod updates step states for TaskRun on TaskRun object since pod has been deleted for cancel or timeout +func terminateStepsInPod(tr *v1.TaskRun, taskRunReason v1.TaskRunReason) { for i, step := range tr.Status.Steps { // If running, include StartedAt for when step began running if step.Running != nil { step.Terminated = &corev1.ContainerStateTerminated{ ExitCode: 1, StartedAt: step.Running.StartedAt, - FinishedAt: completionTime, - Reason: reason.String(), + FinishedAt: *tr.Status.CompletionTime, + // TODO(#7385): replace with more pod/container termination reason instead of overloading taskRunReason + Reason: taskRunReason.String(), + Message: fmt.Sprintf("Step %s terminated as pod %s is terminated", step.Name, tr.Status.PodName), } step.Running = nil tr.Status.Steps[i] = step @@ -751,15 +758,15 @@ func (c *Reconciler) failTaskRun(ctx context.Context, tr *v1.TaskRun, reason v1. if step.Waiting != nil { step.Terminated = &corev1.ContainerStateTerminated{ ExitCode: 1, - FinishedAt: completionTime, - Reason: reason.String(), + FinishedAt: *tr.Status.CompletionTime, + // TODO(#7385): replace with more pod/container termination reason instead of overloading taskRunReason + Reason: taskRunReason.String(), + Message: fmt.Sprintf("Step %s terminated as pod %s is terminated", step.Name, tr.Status.PodName), } step.Waiting = nil tr.Status.Steps[i] = step } } - - return nil } // createPod creates a Pod based on the Task's configuration, with pvcName as a volumeMount diff --git a/pkg/reconciler/taskrun/taskrun_test.go b/pkg/reconciler/taskrun/taskrun_test.go index 3796d3757cf..7bfd7266792 100644 --- a/pkg/reconciler/taskrun/taskrun_test.go +++ b/pkg/reconciler/taskrun/taskrun_test.go @@ -4218,6 +4218,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonCancelled.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4263,6 +4264,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonCancelled.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4304,6 +4306,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4360,6 +4363,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4368,6 +4372,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4413,6 +4418,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4421,6 +4427,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4429,6 +4436,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4482,6 +4490,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, },