Skip to content

Commit

Permalink
Add granular termination reason in container termination message
Browse files Browse the repository at this point in the history
Related with #7539 and #7223

To report specific Steps termination reasons we need to know why its continer finished; we use the termination message to store a new "state" with this information. We are adding a new field to store this information per step.

Co-authored-by: JeromeJu <[email protected]>
Co-authored-by: Chitrang Patel <[email protected]>
  • Loading branch information
3 people committed Feb 5, 2024
1 parent ec051b2 commit e76905e
Show file tree
Hide file tree
Showing 21 changed files with 795 additions and 41 deletions.
2 changes: 1 addition & 1 deletion cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ func main() {
if err := e.Go(); err != nil {
breakpointExitPostFile := e.PostFile + breakpointExitSuffix
switch t := err.(type) { //nolint:errorlint // checking for multiple types with errors.As is ugly.
case skipError:
case entrypoint.SkipError:
log.Print("Skipping step because a previous step failed")
os.Exit(1)
case termination.MessageLengthError:
Expand Down
8 changes: 1 addition & 7 deletions cmd/entrypoint/waiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
if breakpointOnFailure {
return nil
}
return skipError("error file present, bail and skip the step")
return entrypoint.ErrSkipPreviousStepFailed
}
select {
case <-ctx.Done():
Expand All @@ -86,9 +86,3 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
}
}
}

type skipError string

func (e skipError) Error() string {
return string(e)
}
4 changes: 2 additions & 2 deletions cmd/entrypoint/waiter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func TestRealWaiterWaitWithErrorWaitfile(t *testing.T) {
if err == nil {
t.Errorf("expected skipError upon encounter error waitfile")
}
var skipErr skipError
var skipErr entrypoint.SkipError
if errors.As(err, &skipErr) {
close(doneCh)
} else {
Expand Down Expand Up @@ -292,7 +292,7 @@ func TestRealWaiterWaitContextWithErrorWaitfile(t *testing.T) {
if err == nil {
t.Errorf("expected skipError upon encounter error waitfile")
}
var skipErr skipError
var skipErr entrypoint.SkipError
if errors.As(err, &skipErr) {
close(doneCh)
} else {
Expand Down
10 changes: 10 additions & 0 deletions docs/pipeline-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -4641,6 +4641,16 @@ string
<td>
</td>
</tr>
<tr>
<td>
<code>terminationReason</code><br/>
<em>
string
</em>
</td>
<td>
</td>
</tr>
</tbody>
</table>
<h3 id="tekton.dev/v1.StepTemplate">StepTemplate
Expand Down
1 change: 1 addition & 0 deletions docs/stepactions.md
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ status:
- container: step-action-runner
imageID: docker.io/library/alpine@sha256:eece025e432126ce23f223450a0326fbebde39cdf496a85d8c016293fc851978
name: action-runner
terminationReason: Completed
terminated:
containerID: containerd://46a836588967202c05b594696077b147a0eb0621976534765478925bb7ce57f6
exitCode: 0
Expand Down
2 changes: 2 additions & 0 deletions docs/taskruns.md
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ The `status` field defines the observed state of `TaskRun`
- `refSource`: the source from where a remote `Task` definition was fetched.
- `featureFlags`: Identifies the feature flags used during the `TaskRun`.
- `steps` - Contains the `state` of each `step` container.
- `steps[].terminationReason` - When the step is terminated, it stores the step's final state.
- `retriesStatus` - Contains the history of `TaskRun`'s `status` in case of a retry in order to keep record of failures. No `status` stored within `retriesStatus` will have any `date` within as it is redundant.

- [`sidecars`](tasks.md#using-a-sidecar-in-a-task) - This field is a list. The list has one entry per `sidecar` in the manifest. Each entry represents the imageid of the corresponding sidecar.
Expand Down Expand Up @@ -831,6 +832,7 @@ steps:
- container: step-hello
imageID: docker-pullable://busybox@sha256:895ab622e92e18d6b461d671081757af7dbaa3b00e3e28e12505af7817f73649
name: hello
terminationReason: Completed
terminated:
containerID: docker://d5a54f5bbb8e7a6fd3bc7761b78410403244cf4c9c5822087fb0209bf59e3621
exitCode: 0
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/pipeline/v1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/apis/pipeline/v1/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1609,6 +1609,9 @@
"description": "Details about a terminated container",
"$ref": "#/definitions/v1.ContainerStateTerminated"
},
"terminationReason": {
"type": "string"
},
"waiting": {
"description": "Details about a waiting container",
"$ref": "#/definitions/v1.ContainerStateWaiting"
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/pipeline/v1/taskrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ type StepState struct {
Container string `json:"container,omitempty"`
ImageID string `json:"imageID,omitempty"`
Results []TaskRunStepResult `json:"results,omitempty"`
TerminationReason string `json:"terminationReason,omitempty"`
}

// SidecarState reports the results of running a sidecar in a Task.
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/pipeline/v1beta1/taskrun_conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,11 @@ func (ss StepState) convertTo(ctx context.Context, sink *v1.StepState) {
sink.Container = ss.ContainerName
sink.ImageID = ss.ImageID
sink.Results = nil

if ss.ContainerState.Terminated != nil {
sink.TerminationReason = ss.ContainerState.Terminated.Reason
}

for _, r := range ss.Results {
new := v1.TaskRunStepResult{}
r.convertTo(ctx, &new)
Expand Down
38 changes: 26 additions & 12 deletions pkg/entrypoint/entrypointer.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,19 @@ func (e ContextError) Error() string {
return string(e)
}

type SkipError string

func (e SkipError) Error() string {
return string(e)
}

var (
// ErrContextDeadlineExceeded is the error returned when the context deadline is exceeded
ErrContextDeadlineExceeded = ContextError(context.DeadlineExceeded.Error())
// ErrContextCanceled is the error returned when the context is canceled
ErrContextCanceled = ContextError(context.Canceled.Error())
// ErrSkipPreviousStepFailed is the error returned when the step is skipped due to previous step error
ErrSkipPreviousStepFailed = SkipError("error file present, bail and skip the step")
)

// IsContextDeadlineError determine whether the error is context deadline
Expand Down Expand Up @@ -167,6 +175,11 @@ func (e Entrypointer) Go() error {
Value: time.Now().Format(timeFormat),
ResultType: result.InternalTektonResultType,
})

if errors.Is(err, ErrSkipPreviousStepFailed) {
output = append(output, e.outputRunResult(pod.TerminationReasonSkipped))
}

return err
}
}
Expand Down Expand Up @@ -199,26 +212,18 @@ func (e Entrypointer) Go() error {
}
}()
err = e.Runner.Run(ctx, e.Command...)
if errors.Is(err, ErrContextDeadlineExceeded) {
output = append(output, result.RunResult{
Key: "Reason",
Value: "TimeoutExceeded",
ResultType: result.InternalTektonResultType,
})
}
}

var ee *exec.ExitError
switch {
case err != nil && errors.Is(err, ErrContextCanceled):
logger.Info("Step was canceling")
output = append(output, result.RunResult{
Key: "Reason",
Value: "Cancelled",
ResultType: result.InternalTektonResultType,
})
output = append(output, e.outputRunResult(pod.TerminationReasonCancelled))
e.WritePostFile(e.PostFile, ErrContextCanceled)
e.WriteExitCodeFile(e.StepMetadataDir, syscall.SIGKILL.String())
case errors.Is(err, ErrContextDeadlineExceeded):
e.WritePostFile(e.PostFile, err)
output = append(output, e.outputRunResult(pod.TerminationReasonTimeoutExceeded))
case err != nil && e.BreakpointOnFailure:
logger.Info("Skipping writing to PostFile")
case e.OnError == ContinueOnError && errors.As(err, &ee):
Expand Down Expand Up @@ -453,3 +458,12 @@ func (e *Entrypointer) applyStepResultSubstitutions(stepDir string) error {
e.Command = newCommand
return nil
}

// outputRunResult returns the run reason for a termination
func (e Entrypointer) outputRunResult(terminationReason string) result.RunResult {
return result.RunResult{
Key: "Reason",
Value: terminationReason,
ResultType: result.InternalTektonResultType,
}
}
Loading

0 comments on commit e76905e

Please sign in to comment.