Skip to content

Commit

Permalink
Solidify runc create retries (#18709)
Browse files Browse the repository at this point in the history
  • Loading branch information
csweichel authored Sep 13, 2023
1 parent b57d870 commit 0a5dc02
Showing 1 changed file with 17 additions and 10 deletions.
27 changes: 17 additions & 10 deletions components/docker-up/runc-facade/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ import (
"os"
"os/exec"
"syscall"
"time"

"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/xerrors"
)

const RETRY = 3
const RETRY = 10

var (
defaultOOMScoreAdj = 1000
Expand Down Expand Up @@ -81,22 +82,28 @@ func createAndRunc(runcPath string, log *logrus.Logger) error {
if err != nil {
return xerrors.Errorf("cannot encode config.json: %w", err)
}
for _, fn := range []string{"config.json", "/tmp/debug.json"} {
err = os.WriteFile(fn, fc, 0644)
if err != nil {
return xerrors.Errorf("cannot encode config.json: %w", err)
}
err = os.WriteFile("config.json", fc, 0644)
if err != nil {
return xerrors.Errorf("cannot encode config.json: %w", err)
}

// See here for more details on why retries are necessary.
// https://github.com/gitpod-io/gitpod/issues/12365
for i := 0; i <= RETRY; i++ {
err = syscall.Exec(runcPath, os.Args, os.Environ())
if err == nil {
return err
} else {
err = exec.Command(runcPath, os.Args[1:]...).Run()

if err != nil {
log.WithError(err).Warn("runc failed")

// runc creation failures can be caused by timing issues with workspacekit/seccomp notify under load.
// Easing of on the pressure here lowers the likelihood of that error.
// NOTE(cw): glossing over races with delays is bad style, but also pragmatic.
//
// Context: https://linear.app/gitpod/issue/ENG-797/docker-containers-sometimes-fail-to-start
time.Sleep(100 * time.Millisecond)
continue
}
return nil
}
return xerrors.Errorf("exec %s: %w", runcPath, err)
}

0 comments on commit 0a5dc02

Please sign in to comment.