Skip to content

Commit

Permalink
The cancellation of taskruns is now done through the entrypoint binary
Browse files Browse the repository at this point in the history
through a new flag called 'stop_on_cancel'. This removes the need for
deleting the pods to cancel a taskrun, allowing examination of the logs
on the pods from cancelled taskruns. Part of work on issue #3238

Signed-off-by: chengjoey <[email protected]>
  • Loading branch information
chengjoey committed Oct 17, 2023
1 parent 454bfd3 commit a3d6e7f
Show file tree
Hide file tree
Showing 22 changed files with 881 additions and 51 deletions.
12 changes: 11 additions & 1 deletion cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package main

import (
"context"
"encoding/json"
"errors"
"flag"
Expand Down Expand Up @@ -67,7 +68,7 @@ const (

func checkForBreakpointOnFailure(e entrypoint.Entrypointer, breakpointExitPostFile string) {
if e.BreakpointOnFailure {
if waitErr := e.Waiter.Wait(breakpointExitPostFile, false, false); waitErr != nil {
if waitErr := e.Waiter.Wait(context.Background(), breakpointExitPostFile, false, false); waitErr != nil {
log.Println("error occurred while waiting for " + breakpointExitPostFile + " : " + waitErr.Error())
}
// get exitcode from .breakpointexit
Expand Down Expand Up @@ -181,6 +182,15 @@ func main() {
case termination.MessageLengthError:
log.Print(err.Error())
os.Exit(1)
case entrypoint.ContextError:
if errors.Is(err, entrypoint.ErrContextCanceled) {
log.Print("Step was cancelled")
// use the SIGKILL signal to distinguish normal exit programs, just like kill -9 PID
os.Exit(int(syscall.SIGKILL))
} else {
log.Print(err.Error())
os.Exit(1)
}
case *exec.ExitError:
// Copied from https://stackoverflow.com/questions/10385551/get-exit-code-go
// This works on both Unix and Windows. Although
Expand Down
13 changes: 11 additions & 2 deletions cmd/entrypoint/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@ func (rr *realRunner) Run(ctx context.Context, args ...string) error {
// Start defined command
if err := cmd.Start(); err != nil {
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
return context.DeadlineExceeded
return entrypoint.ErrContextDeadlineExceeded
}
if errors.Is(ctx.Err(), context.Canceled) {
return entrypoint.ErrContextCanceled
}
return err
}
Expand All @@ -134,9 +137,15 @@ func (rr *realRunner) Run(ctx context.Context, args ...string) error {
}()

// Wait for command to exit
// as os.exec [note](https://github.com/golang/go/blob/ee522e2cdad04a43bc9374776483b6249eb97ec9/src/os/exec/exec.go#L897-L906)
// cmd.Wait prefer Process error over context error
// but we want to return context error instead
if err := cmd.Wait(); err != nil {
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
return context.DeadlineExceeded
return entrypoint.ErrContextDeadlineExceeded
}
if errors.Is(ctx.Err(), context.Canceled) {
return entrypoint.ErrContextCanceled
}
return err
}
Expand Down
47 changes: 46 additions & 1 deletion cmd/entrypoint/runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ import (
"errors"
"fmt"
"io"
"math/rand"
"os"
"path/filepath"
"strings"
"syscall"
"testing"
"time"

"github.com/tektoncd/pipeline/pkg/entrypoint"
)

// TestRealRunnerSignalForwarding will artificially put an interrupt signal (SIGINT) in the rr.signals chan.
Expand Down Expand Up @@ -183,10 +186,52 @@ func TestRealRunnerTimeout(t *testing.T) {
defer cancel()

if err := rr.Run(ctx, "sleep", "0.01"); err != nil {
if !errors.Is(err, context.DeadlineExceeded) {
if !errors.Is(err, entrypoint.ErrContextDeadlineExceeded) {
t.Fatalf("unexpected error received: %v", err)
}
} else {
t.Fatalf("step didn't timeout")
}
}

func TestRealRunnerCancel(t *testing.T) {
testCases := []struct {
name string
timeout time.Duration
wantErr error
}{
{
name: "cancel before cmd wait",
timeout: 0,
wantErr: entrypoint.ErrContextCanceled,
},
{
name: "cancel on cmd wait",
timeout: time.Second * time.Duration(rand.Intn(3)),
wantErr: entrypoint.ErrContextCanceled,
},
{
name: "cancel after cmd wait",
timeout: time.Second * 4,
wantErr: nil,
},
}
for _, tc := range testCases {
rr := realRunner{}
ctx, cancel := context.WithCancel(context.Background())
go func() {
time.Sleep(tc.timeout)
cancel()
}()
err := rr.Run(ctx, "sleep", "3")
if tc.wantErr != nil {
if !errors.Is(err, tc.wantErr) {
t.Fatalf("unexpected error received: %v", err)
}
} else {
if err != nil {
t.Fatalf("unexpected error received: %v", err)
}
}
}
}
17 changes: 15 additions & 2 deletions cmd/entrypoint/waiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package main

import (
"context"
"errors"
"fmt"
"os"
"time"
Expand Down Expand Up @@ -47,11 +49,22 @@ func (rw *realWaiter) setWaitPollingInterval(pollingInterval time.Duration) *rea
//
// If a file of the same name with a ".err" extension exists then this Wait
// will end with a skipError.
func (rw *realWaiter) Wait(file string, expectContent bool, breakpointOnFailure bool) error {
func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool, breakpointOnFailure bool) error {
if file == "" {
return nil
}
for ; ; time.Sleep(rw.waitPollingInterval) {
for {
select {
case <-ctx.Done():
if errors.Is(ctx.Err(), context.Canceled) {
return entrypoint.ErrContextCanceled
}
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
return entrypoint.ErrContextDeadlineExceeded
}
return nil
case <-time.After(rw.waitPollingInterval):
}
if info, err := os.Stat(file); err == nil {
if !expectContent || info.Size() > 0 {
return nil
Expand Down
Loading

0 comments on commit a3d6e7f

Please sign in to comment.