Skip to content

Commit

Permalink
[supervisor] set pod failure reason when supervisor is reaped (#20318)
Browse files Browse the repository at this point in the history
* [supervisor] set pod failed reason when supervisor been reap

* Debug commit

* upgrade pkg

* Revert "Debug commit"

This reverts commit 1b6bde8.
  • Loading branch information
mustard-mh authored Oct 25, 2024
1 parent a836f97 commit 9e8da3b
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 12 deletions.
48 changes: 39 additions & 9 deletions components/supervisor/cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import (
"github.com/gitpod-io/gitpod/common-go/process"
"github.com/gitpod-io/gitpod/supervisor/pkg/shared"
"github.com/gitpod-io/gitpod/supervisor/pkg/supervisor"
reaper "github.com/gitpod-io/go-reaper"
"github.com/prometheus/procfs"
reaper "github.com/ramr/go-reaper"
"github.com/spf13/cobra"
)

Expand Down Expand Up @@ -77,25 +77,55 @@ var initCmd = &cobra.Command{
}

supervisorDone := make(chan struct{})
handledByReaper := make(chan int)
handleSupervisorExit := func(exitCode int) {
if exitCode == 0 {
return
}
logs := extractFailureFromRun()
if shared.IsExpectedShutdown(exitCode) {
log.Fatal(logs)
} else {
log.WithError(fmt.Errorf(logs)).Fatal("supervisor run error with unexpected exit code")
}
}
go func() {
defer close(supervisorDone)

err := runCommand.Wait()
if err != nil && !(strings.Contains(err.Error(), "signal: ") || strings.Contains(err.Error(), "no child processes")) {
if err == nil {
return
}
// exited by reaper
if strings.Contains(err.Error(), "no child processes") {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
select {
case <-ctx.Done(): // timeout
case exitCode := <-handledByReaper:
handleSupervisorExit(exitCode)
}
} else if !(strings.Contains(err.Error(), "signal: ")) {
if eerr, ok := err.(*exec.ExitError); ok && eerr.ExitCode() != 0 {
logs := extractFailureFromRun()
if shared.IsExpectedShutdown(eerr.ExitCode()) {
log.Fatal(logs)
} else {
log.WithError(fmt.Errorf(logs)).Fatal("supervisor run error with unexpected exit code")
}
handleSupervisorExit(eerr.ExitCode())
}
log.WithError(err).Error("supervisor run error")
return
}
}()
// start the reaper to clean up zombie processes
reaper.Reap()
reaper.Start(reaper.Config{
Pid: -1,
Options: 0,
DisablePid1Check: false,
OnReap: func(pid int, wstatus syscall.WaitStatus) {
if pid != runCommand.Process.Pid {
return
}
exitCode := wstatus.ExitStatus()
handledByReaper <- exitCode
},
})

select {
case <-supervisorDone:
Expand Down
2 changes: 1 addition & 1 deletion components/supervisor/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ require (
github.com/gitpod-io/gitpod/ide-metrics-api v0.0.0-00010101000000-000000000000
github.com/gitpod-io/gitpod/supervisor/api v0.0.0-00010101000000-000000000000
github.com/gitpod-io/gitpod/ws-daemon/api v0.0.0-00010101000000-000000000000
github.com/gitpod-io/go-reaper v0.0.0-20241024192051-78d04cc2e25f
github.com/golang/mock v1.6.0
github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.6.0
Expand All @@ -29,7 +30,6 @@ require (
github.com/prometheus/common v0.42.0
github.com/prometheus/procfs v0.10.1
github.com/prometheus/pushgateway v1.5.1
github.com/ramr/go-reaper v0.2.1
github.com/sirupsen/logrus v1.9.3
github.com/soheilhy/cmux v0.1.5
github.com/spf13/cobra v1.4.0
Expand Down
6 changes: 4 additions & 2 deletions components/supervisor/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9e8da3b

Please sign in to comment.