Skip to content

Commit

Permalink
Adds program to allow syscalls to be canceled
Browse files Browse the repository at this point in the history
  • Loading branch information
reederc42 authored Jan 16, 2025
1 parent 9ba9d1b commit ecb15b2
Show file tree
Hide file tree
Showing 12 changed files with 272 additions and 39 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ ARG BIN=trident_orchestrator
ARG CLI_BIN=tridentctl
ARG CHWRAP_BIN=chwrap.tar
ARG NODE_PREP_BIN=node_prep
ARG SYSWRAP_BIN=syswrap

COPY ${BIN} /trident_orchestrator
COPY ${CLI_BIN} /bin/tridentctl
COPY ${NODE_PREP_BIN} /node_prep
COPY ${SYSWRAP_BIN} /syswrap
ADD ${CHWRAP_BIN} /

ENTRYPOINT ["/bin/tridentctl"]
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ binaries_for_platform = $(call go_build,tridentctl,./cli,$1,$2)\
$(if $(findstring darwin,$1),,\
&& $(call go_build,trident_orchestrator,.,$1,$2)\
$(if $(findstring linux,$1),\
&& $(call go_build,syswrap,./cmd/syswrap,$1,$2) \
&& $(call chwrap_build,$1,$2) \
&& $(call node_prep_build,$1,$2) ))

Expand Down Expand Up @@ -239,6 +240,7 @@ docker_build_linux = $1 build \
--build-arg CLI_BIN=$(call binary_path,tridentctl,$2) \
--build-arg NODE_PREP_BIN=$(call binary_path,node_prep,$2) \
--build-arg CHWRAP_BIN=$(call binary_path,chwrap.tar,$2) \
--build-arg SYSWRAP_BIN=${call binary_path,syswrap,$2} \
--tag $3 \
--rm \
$(if $(findstring $(DOCKER_BUILDX_BUILD_CLI),$1),--builder trident-builder) \
Expand Down
76 changes: 76 additions & 0 deletions cmd/syswrap/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package main

import (
"encoding/json"
"fmt"
"os"
"time"

"github.com/netapp/trident/internal/syswrap/unix"
)

var syscalls = map[string]func([]string) (interface{}, error){
"statfs": unix.Statfs,
"exists": unix.Exists,
}

type result struct {
output interface{}
err error
}

func main() {
timeout, syscall, args, err := parseArgs(os.Args)
if err != nil {
exit(err)
}

select {
case <-time.After(timeout):
exit(fmt.Errorf("timed out waiting for %s", syscall))
case res := <-func() chan result {
r := make(chan result)
go func() {
s, ok := syscalls[syscall]
if !ok {
r <- result{
err: fmt.Errorf("unknown syscall: %s", syscall),
}
}
i, e := s(args)
r <- result{
output: i,
err: e,
}
}()
return r
}():
if res.err != nil {
exit(res.err)
}
_ = json.NewEncoder(os.Stdout).Encode(res.output)
}
}

func exit(err error) {
_, _ = fmt.Fprintf(os.Stderr, "error: %v", err)
os.Exit(1)
}

func parseArgs(args []string) (timeout time.Duration, syscall string, syscallArgs []string, err error) {
if len(args) < 3 {
err = fmt.Errorf("expected at least 3 arguments")
return
}
timeout, err = time.ParseDuration(args[1])
if err != nil {
return
}

syscall = args[2]

if len(args) > 3 {
syscallArgs = args[3:]
}
return
}
4 changes: 3 additions & 1 deletion frontend/csi/node_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

tridentconfig "github.com/netapp/trident/config"
"github.com/netapp/trident/internal/fiji"
"github.com/netapp/trident/internal/syswrap"
. "github.com/netapp/trident/logging"
"github.com/netapp/trident/pkg/collection"
"github.com/netapp/trident/pkg/convert"
Expand Down Expand Up @@ -54,6 +55,7 @@ const (
maximumNodeReconciliationJitter = 5000 * time.Millisecond
nvmeMaxFlushWaitDuration = 6 * time.Minute
csiNodeLockTimeout = 60 * time.Second
fsUnavailableTimeout = 5 * time.Second
)

var (
Expand Down Expand Up @@ -350,7 +352,7 @@ func (p *Plugin) NodeGetVolumeStats(
}

// Ensure volume is published at path
exists, err := p.osutils.PathExists(req.GetVolumePath())
exists, err := syswrap.Exists(ctx, req.GetVolumePath(), fsUnavailableTimeout)
if !exists || err != nil {
return nil, status.Error(codes.NotFound,
fmt.Sprintf("could not find volume mount at path: %s; %v", req.GetVolumePath(), err))
Expand Down
33 changes: 33 additions & 0 deletions internal/syswrap/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# syswrap

`syswrap` is a small program to wrap system calls with timeouts. In Go, syscalls cannot be cleaned up if they are
blocked, for example if `lstat` is called on an inaccessible NFS mount, but because Go can create additional goroutines
the calling application will not block. This can cause resource exhaustion if syscalls are not bounded.

Linux will clean up syscalls if the owning process ends, so `syswrap` exists to allow Trident to create a new process
that owns a potentially blocking syscall.

The Trident-accessible interface is in this package.

## Usage

`syswrap <timeout> <syscall> <syscall args...>`

`<timeout>` is in Go format, i.e. `30s`

`<syscall>` is the name of the call, see `cmd/syswrap/main.go`

`<syscall args...>` are string representations of any arguments required by the call.

## Adding Syscalls

See `syswrap.Exists` for a cross-platform example, and `syswrap.Statfs` for a Linux-only example.

There are 3 steps to adding a new syscall:

1. Add func to `internal/syswrap` package. This func calls the syswrap binary, and it should also call the syscall
itself if the syswrap binary is not found.
2. Add func to `internal/syswrap/unix` package. The func must have the signature
`func(args []string) (output interface{}, err error)`, and parse its args then call the syscall. Any fields in output
that need to be used by Trident must be exported.
3. Add func name to `cmd/syswrap/main.syscalls` map.
14 changes: 14 additions & 0 deletions internal/syswrap/syswrap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//go:build windows || darwin

package syswrap

import (
"context"
"os"
"time"
)

func Exists(_ context.Context, path string, _ time.Duration) (bool, error) {
_, err := os.Stat(path)
return err == nil, nil
}
55 changes: 55 additions & 0 deletions internal/syswrap/syswrap_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Package syswrap wraps syscalls that need to be canceled
package syswrap

import (
"context"
"encoding/json"
"errors"
"os"
"time"

"golang.org/x/sys/unix"

"github.com/netapp/trident/utils/exec"
)

const syswrapBin = "/syswrap"

func Statfs(ctx context.Context, path string, timeout time.Duration) (unix.Statfs_t, error) {
buf, err := exec.NewCommand().Execute(ctx, syswrapBin, timeout.String(), "statfs", path)
if err != nil {
// If syswrap is unavailable fall back to blocking call. This may hang if NFS backend is unreachable
var pe *os.PathError
ok := errors.As(err, &pe)
if !ok {
return unix.Statfs_t{}, err
}

var fsStat unix.Statfs_t
err = unix.Statfs(path, &fsStat)
return fsStat, err
}

var b unix.Statfs_t
err = json.Unmarshal(buf, &b)
return b, err
}

func Exists(ctx context.Context, path string, timeout time.Duration) (bool, error) {
buf, err := exec.NewCommand().Execute(ctx, syswrapBin, timeout.String(), "exists", path)
if err != nil {
// If syswrap is unavailable fall back to blocking call. This may hang if NFS backend is unreachable
var pe *os.PathError
ok := errors.As(err, &pe)
if !ok {
return false, err
}

_, err = os.Stat(path)
return err == nil, err
}

var b bool
err = json.Unmarshal(buf, &b)
return b, err
}
27 changes: 27 additions & 0 deletions internal/syswrap/syswrap_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package syswrap

import (
"context"
"errors"
"os"
"testing"

"github.com/stretchr/testify/assert"

"github.com/netapp/trident/utils/exec"
)

// errors.Is should work to detect PathError from Execute, but it currently does not. If this test starts to fail
// then the errors.As calls in this package should be converted to errors.Is.
func TestSyswrapUnavailableRequiresAs(t *testing.T) {
_, err := os.Stat(syswrapBin)
assert.Error(t, err)
wd, err := os.Getwd()
assert.NoError(t, err)
_, err = exec.NewCommand().Execute(context.Background(), syswrapBin, "1s", "statfs", wd)
assert.Error(t, err)
assert.False(t, errors.Is(err, &os.PathError{}))

var pe *os.PathError
assert.True(t, errors.As(err, &pe))
}
27 changes: 27 additions & 0 deletions internal/syswrap/unix/syscall.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Package unix parses string arguments and calls the system call
package unix

import (
"fmt"
"os"

"golang.org/x/sys/unix"
)

func Statfs(args []string) (interface{}, error) {
if len(args) != 1 {
return nil, fmt.Errorf("expected 1 argument")
}
var buf unix.Statfs_t

Check failure on line 15 in internal/syswrap/unix/syscall.go

View workflow job for this annotation

GitHub Actions / unit tests windows-latest

undefined: unix.Statfs_t
err := unix.Statfs(args[0], &buf)

Check failure on line 16 in internal/syswrap/unix/syscall.go

View workflow job for this annotation

GitHub Actions / unit tests windows-latest

undefined: unix.Statfs
return &buf, err
}

func Exists(args []string) (interface{}, error) {
if len(args) != 1 {
return nil, fmt.Errorf("expected 1 argument")
}
_, err := os.Stat(args[0])
exists := err == nil
return &exists, err
}
15 changes: 15 additions & 0 deletions mocks/mock_utils/mock_osutils/mock_osutils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 5 additions & 34 deletions utils/filesystem/filesystem_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,12 @@ import (
"fmt"
"time"

"golang.org/x/sys/unix"

"github.com/netapp/trident/internal/syswrap"
. "github.com/netapp/trident/logging"
"github.com/netapp/trident/utils/errors"
"github.com/netapp/trident/utils/models"
)

type statFSResult struct {
Output unix.Statfs_t
Error error
}

// GetFilesystemStats returns the size of the filesystem for the given path.
// The caller of the func is responsible for verifying the mountPoint existence and readiness.
func (f *FSClient) GetFilesystemStats(
Expand All @@ -29,35 +23,12 @@ func (f *FSClient) GetFilesystemStats(
Logc(ctx).Debug(">>>> filesystem_linux.GetFilesystemStats")
defer Logc(ctx).Debug("<<<< filesystem_linux.GetFilesystemStats")

timedOut := false
timeout := 30 * time.Second
done := make(chan statFSResult, 1)
var result statFSResult

go func() {
// Warning: syscall.Statfs_t uses types that are OS and arch dependent. The following code has been
// confirmed to work with Linux/amd64 and Darwin/amd64.
var buf unix.Statfs_t
err := unix.Statfs(path, &buf)
done <- statFSResult{Output: buf, Error: err}
}()

select {
case <-time.After(timeout):
timedOut = true
case result = <-done:
break
}

if result.Error != nil {
Logc(ctx).WithField("path", path).Errorf("Failed to statfs: %s", result.Error)
return 0, 0, 0, 0, 0, 0, fmt.Errorf("couldn't get filesystem stats %s: %s", path, result.Error)
} else if timedOut {
Logc(ctx).WithField("path", path).Errorf("Failed to statfs due to timeout")
return 0, 0, 0, 0, 0, 0, fmt.Errorf("couldn't get filesystem stats %s: timeout", path)
buf, err := syswrap.Statfs(ctx, path, 30*time.Second)
if err != nil {
Logc(ctx).WithField("path", path).Errorf("Failed to statfs: %s", err)
return 0, 0, 0, 0, 0, 0, fmt.Errorf("couldn't get filesystem stats %s: %s", path, err)
}

buf := result.Output
//goland:noinspection GoRedundantConversion
size := int64(buf.Blocks) * int64(buf.Bsize)

Expand Down
Loading

0 comments on commit ecb15b2

Please sign in to comment.