[TEP-0050] Implement PipelineTask OnError

Part of [#7165][#7165]. In [TEP-0050][tep-0050], we proposed to add an `OnError` API field under `PipelineTask` to configure error handling strategy. This commits leverages `PipelineTask.OnError` API field introduced in the previous PR, implement the error handling strategy, update related docs and tests. /kind feature [tep-0050]: https://github.com/tektoncd/community/blob/main/teps/0050-ignore-task-failures.md [#7165]: #7165
tektoncd · Dec 6, 2023 · b6d27a8 · b6d27a8
1 parent d1b8f29
commit b6d27a8
Show file tree

Hide file tree

Showing 19 changed files with 391 additions and 27 deletions.
diff --git a/docs/pipeline-api.md b/docs/pipeline-api.md
@@ -2887,9 +2887,7 @@ PipelineTaskOnErrorType
 <td>
 <em>(Optional)</em>
 <p>OnError defines the exiting behavior of a PipelineRun on error
-can be set to [ continue | stopAndFail ]
-Note: OnError is in preview mode and not yet supported
-TODO(#7165)</p>
+can be set to [ continue | stopAndFail ]</p>
 </td>
 </tr>
 </tbody>
@@ -5138,6 +5136,10 @@ that references within the TaskRun could not be resolved</p>
 <td><p>TaskRunReasonFailedValidation indicated that the reason for failure status is
 that taskrun failed runtime validation</p>
 </td>
+</tr><tr><td><p>&#34;FailureIgnored&#34;</p></td>
+<td><p>TaskRunReasonFailureIgnored is the reason set when the Taskrun has failed due to pod execution error and the failure is ignored for the owning PipelineRun.
+TaskRuns failed due to reconciler/validation error should not use this reason.</p>
+</td>
 </tr><tr><td><p>&#34;TaskRunImagePullFailed&#34;</p></td>
 <td><p>TaskRunReasonImagePullFailed is the reason set when the step of a task fails due to image not being pulled</p>
 </td>
@@ -11611,9 +11613,7 @@ PipelineTaskOnErrorType
 <td>
 <em>(Optional)</em>
 <p>OnError defines the exiting behavior of a PipelineRun on error
-can be set to [ continue | stopAndFail ]
-Note: OnError is in preview mode and not yet supported
-TODO(#7165)</p>
+can be set to [ continue | stopAndFail ]</p>
 </td>
 </tr>
 </tbody>

diff --git a/docs/pipelines.md b/docs/pipelines.md
@@ -724,8 +724,6 @@ tasks:
 
 > :seedling: **Specifying `onError` in `PipelineTasks` is an [alpha](additional-configs.md#alpha-features) feature.** The `enable-api-fields` feature flag must be set to `"alpha"` to specify `onError`  in a `PipelineTask`.
 
-> :seedling: This feature is in **Preview Only** mode and not yet supported/implemented.
-
 When a `PipelineTask` fails, the rest of the `PipelineTasks` are skipped and the `PipelineRun` is declared a failure. If you would like to
 ignore such `PipelineTask` failure and continue executing the rest of the `PipelineTasks`, you can specify `onError` for such a `PipelineTask`.
 

diff --git a/examples/v1/pipelineruns/alpha/ignore-task-error.yaml b/examples/v1/pipelineruns/alpha/ignore-task-error.yaml
@@ -0,0 +1,25 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  generateName: pipelinerun-with-failing-task-
+spec:
+  pipelineSpec:
+    tasks:
+      - name: echo-continue
+        onError: continue
+        taskSpec:
+          steps:
+            - name: write
+              image: alpine
+              script: |
+                echo "this is a failing task"
+                exit 1
+      - name: echo
+        runAfter:
+          - echo-continue
+        taskSpec:
+          steps:
+            - name: write
+              image: alpine
+              script: |
+                echo "this is a success task"
diff --git a/pkg/apis/pipeline/v1/openapi_generated.go b/pkg/apis/pipeline/v1/openapi_generated.go
diff --git a/pkg/apis/pipeline/v1/pipeline_types.go b/pkg/apis/pipeline/v1/pipeline_types.go
@@ -248,8 +248,6 @@ type PipelineTask struct {
 
 	// OnError defines the exiting behavior of a PipelineRun on error
 	// can be set to [ continue | stopAndFail ]
-	// Note: OnError is in preview mode and not yet supported
-	// TODO(#7165)
 	// +optional
 	OnError PipelineTaskOnErrorType `json:"onError,omitempty"`
 }

diff --git a/pkg/apis/pipeline/v1/pipelinerun_types.go b/pkg/apis/pipeline/v1/pipelinerun_types.go
@@ -416,6 +416,9 @@ const (
 	PipelineRunReasonInvalidParamValue PipelineRunReason = "InvalidParamValue"
 )
 
+// PipelineTaskOnErrorAnnotation is used to pass the failure strategy to TaskRun pods from PipelineTask OnError field
+const PipelineTaskOnErrorAnnotation = "pipeline.tekton.dev/pipeline-task-on-error"
+
 func (t PipelineRunReason) String() string {
 	return string(t)
 }

diff --git a/pkg/apis/pipeline/v1/swagger.json b/pkg/apis/pipeline/v1/swagger.json
@@ -894,7 +894,7 @@
           "type": "string"
         },
         "onError": {
-          "description": "OnError defines the exiting behavior of a PipelineRun on error can be set to [ continue | stopAndFail ] Note: OnError is in preview mode and not yet supported",
+          "description": "OnError defines the exiting behavior of a PipelineRun on error can be set to [ continue | stopAndFail ]",
           "type": "string"
         },
         "params": {

diff --git a/pkg/apis/pipeline/v1/taskrun_types.go b/pkg/apis/pipeline/v1/taskrun_types.go
@@ -202,6 +202,9 @@ const (
 	// TaskRunReasonResourceVerificationFailed indicates that the task fails the trusted resource verification,
 	// it could be the content has changed, signature is invalid or public key is invalid
 	TaskRunReasonResourceVerificationFailed TaskRunReason = "ResourceVerificationFailed"
+	// TaskRunReasonFailureIgnored is the reason set when the Taskrun has failed due to pod execution error and the failure is ignored for the owning PipelineRun.
+	// TaskRuns failed due to reconciler/validation error should not use this reason.
+	TaskRunReasonFailureIgnored TaskRunReason = "FailureIgnored"
 )
 
 func (t TaskRunReason) String() string {

diff --git a/pkg/apis/pipeline/v1beta1/openapi_generated.go b/pkg/apis/pipeline/v1beta1/openapi_generated.go
diff --git a/pkg/apis/pipeline/v1beta1/pipeline_types.go b/pkg/apis/pipeline/v1beta1/pipeline_types.go
@@ -262,8 +262,6 @@ type PipelineTask struct {
 
 	// OnError defines the exiting behavior of a PipelineRun on error
 	// can be set to [ continue | stopAndFail ]
-	// Note: OnError is in preview mode and not yet supported
-	// TODO(#7165)
 	// +optional
 	OnError PipelineTaskOnErrorType `json:"onError,omitempty"`
 }

diff --git a/pkg/apis/pipeline/v1beta1/swagger.json b/pkg/apis/pipeline/v1beta1/swagger.json
@@ -1287,7 +1287,7 @@
           "type": "string"
         },
         "onError": {
-          "description": "OnError defines the exiting behavior of a PipelineRun on error can be set to [ continue | stopAndFail ] Note: OnError is in preview mode and not yet supported",
+          "description": "OnError defines the exiting behavior of a PipelineRun on error can be set to [ continue | stopAndFail ]",
           "type": "string"
         },
         "params": {

diff --git a/pkg/pod/status.go b/pkg/pod/status.go
@@ -128,7 +128,12 @@ func MakeTaskRunStatus(ctx context.Context, logger *zap.SugaredLogger, tr v1.Tas
 	complete := areStepsComplete(pod) || pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed
 
 	if complete {
-		updateCompletedTaskRunStatus(logger, trs, pod)
+		onError, ok := tr.Annotations[v1.PipelineTaskOnErrorAnnotation]
+		if ok {
+			updateCompletedTaskRunStatus(logger, trs, pod, v1.PipelineTaskOnErrorType(onError))
+		} else {
+			updateCompletedTaskRunStatus(logger, trs, pod, "")
+		}
 	} else {
 		updateIncompleteTaskRunStatus(trs, pod)
 	}
@@ -483,10 +488,14 @@ func extractExitCodeFromResults(results []result.RunResult) (*int32, error) {
 	return nil, nil //nolint:nilnil // would be more ergonomic to return a sentinel error
 }
 
-func updateCompletedTaskRunStatus(logger *zap.SugaredLogger, trs *v1.TaskRunStatus, pod *corev1.Pod) {
+func updateCompletedTaskRunStatus(logger *zap.SugaredLogger, trs *v1.TaskRunStatus, pod *corev1.Pod, onError v1.PipelineTaskOnErrorType) {
 	if DidTaskRunFail(pod) {
 		msg := getFailureMessage(logger, pod)
-		markStatusFailure(trs, v1.TaskRunReasonFailed.String(), msg)
+		if onError == v1.PipelineTaskContinue {
+			markStatusFailure(trs, v1.TaskRunReasonFailureIgnored.String(), msg)
+		} else {
+			markStatusFailure(trs, v1.TaskRunReasonFailed.String(), msg)
+		}
 	} else {
 		markStatusSuccess(trs)
 	}

diff --git a/pkg/pod/status_test.go b/pkg/pod/status_test.go
@@ -1542,6 +1542,75 @@ func TestMakeTaskRunStatus(t *testing.T) {
 	}
 }
 
+func TestMakeRunStatus_OnError(t *testing.T) {
+	for _, c := range []struct {
+		name      string
+		podStatus corev1.PodStatus
+		onError   v1.PipelineTaskOnErrorType
+		want      v1.TaskRunStatus
+	}{{
+		name: "onError: continue",
+		podStatus: corev1.PodStatus{
+			Phase:   corev1.PodFailed,
+			Message: "boom",
+		},
+		onError: v1.PipelineTaskContinue,
+		want: v1.TaskRunStatus{
+			Status: statusFailure(string(v1.TaskRunReasonFailureIgnored), "boom"),
+		},
+	}, {
+		name: "onError: stopAndFail",
+		podStatus: corev1.PodStatus{
+			Phase:   corev1.PodFailed,
+			Message: "boom",
+		},
+		onError: v1.PipelineTaskStopAndFail,
+		want: v1.TaskRunStatus{
+			Status: statusFailure(string(v1.TaskRunReasonFailed), "boom"),
+		},
+	}, {
+		name: "stand alone TaskRun",
+		podStatus: corev1.PodStatus{
+			Phase:   corev1.PodFailed,
+			Message: "boom",
+		},
+		want: v1.TaskRunStatus{
+			Status: statusFailure(string(v1.TaskRunReasonFailed), "boom"),
+		},
+	}} {
+		t.Run(c.name, func(t *testing.T) {
+			pod := corev1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "pod",
+					Namespace: "foo",
+				},
+				Status: c.podStatus,
+			}
+			tr := v1.TaskRun{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "task-run",
+					Namespace: "foo",
+				},
+			}
+			if c.onError != "" {
+				tr.Annotations = map[string]string{}
+				tr.Annotations[v1.PipelineTaskOnErrorAnnotation] = string(c.onError)
+			}
+
+			logger, _ := logging.NewLogger("", "status")
+			kubeclient := fakek8s.NewSimpleClientset()
+			got, err := MakeTaskRunStatus(context.Background(), logger, tr, &pod, kubeclient, &v1.TaskSpec{})
+			if err != nil {
+				t.Errorf("Unexpected err in MakeTaskRunResult: %s", err)
+			}
+
+			if d := cmp.Diff(c.want.Status, got.Status, ignoreVolatileTime); d != "" {
+				t.Errorf("Diff %s", diff.PrintWantGot(d))
+			}
+		})
+	}
+}
+
 func TestMakeTaskRunStatusAlpha(t *testing.T) {
 	for _, c := range []struct {
 		desc      string

diff --git a/pkg/reconciler/pipelinerun/pipelinerun.go b/pkg/reconciler/pipelinerun/pipelinerun.go
@@ -958,6 +958,9 @@ func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, para
 	if spanContext, err := getMarshalledSpanFromContext(ctx); err == nil {
 		tr.Annotations[TaskRunSpanContextAnnotation] = spanContext
 	}
+	if rpt.PipelineTask.OnError == v1.PipelineTaskContinue {
+		tr.Annotations[v1.PipelineTaskOnErrorAnnotation] = string(v1.PipelineTaskContinue)
+	}
 
 	if rpt.PipelineTask.Timeout != nil {
 		tr.Spec.Timeout = rpt.PipelineTask.Timeout

diff --git a/pkg/reconciler/pipelinerun/pipelinerun_test.go b/pkg/reconciler/pipelinerun/pipelinerun_test.go
@@ -1060,6 +1060,76 @@ spec:
 	}
 }
 
+// TestPipelineTaskErrorIsIgnored tests that a resource dependent PipelineTask with onError:continue is skipped
+// if the parent PipelineTask fails to produce the result
+func TestPipelineTaskErrorIsIgnored(t *testing.T) {
+	prs := []*v1.PipelineRun{parse.MustParseV1PipelineRun(t, `
+metadata:
+  name: test-pipeline-missing-results
+  namespace: foo
+spec:
+  serviceAccountName: test-sa-0
+  pipelineSpec:
+    tasks:
+    - name: task1
+      taskSpec:
+        results:
+        - name: result1
+          type: string
+        steps:
+        - name: failing-step
+          onError: continue
+          image: busybox
+          script: exit 1; echo -n 123 | tee $(results.result1.path)'
+    - name: task2
+      onError: continue
+      params:
+      - name: param1
+        value: $(tasks.task1.results.result1)
+      taskSpec:
+        params:
+        - name: param1
+          type: string
+        steps:
+        - name: foo
+          image: busybox
+          script: 'echo $(params.param1)'
+`)}
+	trs := []*v1.TaskRun{mustParseTaskRunWithObjectMeta(t,
+		taskRunObjectMeta("test-pipeline-missing-results-task1", "foo",
+			"test-pipeline-missing-results", "test-pipeline", "task1", true),
+		`
+spec:
+  serviceAccountName: test-sa
+  timeout: 1h0m0s
+status:
+  conditions:
+  - status: "True"
+    type: Succeeded
+`)}
+	cms := []*corev1.ConfigMap{withEnabledAlphaAPIFields(newFeatureFlagsConfigMap())}
+
+	d := test.Data{
+		PipelineRuns: prs,
+		TaskRuns:     trs,
+		ConfigMaps:   cms,
+	}
+	prt := newPipelineRunTest(t, d)
+	defer prt.Cancel()
+
+	reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-missing-results", []string{}, false)
+	cond := reconciledRun.Status.Conditions[0]
+	if cond.Status != corev1.ConditionTrue {
+		t.Fatalf("expected PipelineRun status to be True but got: %s", cond.Status)
+	}
+	if len(reconciledRun.Status.SkippedTasks) != 1 {
+		t.Fatalf("expected 1 skipped Task but got %v", len(reconciledRun.Status.SkippedTasks))
+	}
+	if reconciledRun.Status.SkippedTasks[0].Reason != v1.MissingResultsSkip {
+		t.Fatalf("expected 1 skipped Task with reason %s, but got %v", v1.MissingResultsSkip, reconciledRun.Status.SkippedTasks[0].Reason)
+	}
+}
+
 func TestMissingResultWhenStepErrorIsIgnored(t *testing.T) {
 	prs := []*v1.PipelineRun{parse.MustParseV1PipelineRun(t, `
 metadata:

diff --git a/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go b/pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go
@@ -393,7 +393,9 @@ func (t *ResolvedPipelineTask) skipBecauseResultReferencesAreMissing(facts *Pipe
 		resolvedResultRefs, pt, err := ResolveResultRefs(facts.State, PipelineRunState{t})
 		rpt := facts.State.ToMap()[pt]
 		if rpt != nil {
-			if err != nil && (t.IsFinalTask(facts) || rpt.Skip(facts).SkippingReason == v1.WhenExpressionsSkip) {
+			if err != nil &&
+				(t.PipelineTask.OnError == v1.PipelineTaskContinue ||
+					(t.IsFinalTask(facts) || rpt.Skip(facts).SkippingReason == v1.WhenExpressionsSkip)) {
 				return true
 			}
 		}