Skip to content

Commit

Permalink
Merge pull request #387 from solarwinds/feature/NH-57813
Browse files Browse the repository at this point in the history
NH-57813: Updating image - wrapper around otel collector that try to detect checkpoint corruption
  • Loading branch information
gantrior authored Oct 9, 2023
2 parents c8efc1d + 514d0ec commit 4202e99
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 4 deletions.
9 changes: 7 additions & 2 deletions build/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ FROM debian:11.7@sha256:432f545c6ba13b79e2681f4cc4858788b0ab099fc1cca799cc0fae46
RUN apt update
RUN apt install -y systemd

FROM base as wrapper
WORKDIR /src/src/wrapper
RUN CGO_ENABLED=0 GOEXPERIMENT=boringcrypto go build -a -tags netgo -ldflags '-w -extldflags "-static"' -o /bin/wrapper && chmod +x /bin/wrapper

FROM scratch as journalbinaries

# dynamically linked libraries that are required for journalctl and the journalctl binary itself
Expand Down Expand Up @@ -61,8 +65,9 @@ USER ${USER_UID}

COPY --from=prep /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
COPY --from=builder /src/swi-k8s-opentelemetry-collector /swi-otelcol
COPY --from=wrapper /bin/wrapper /wrapper
COPY --from=journalbinaries / /

ENTRYPOINT ["/swi-otelcol"]
CMD ["--config=/opt/default-config.yaml"]
ENTRYPOINT ["/wrapper"]
CMD ["/swi-otelcol", "--config=/opt/default-config.yaml"]

14 changes: 12 additions & 2 deletions build/docker/Dockerfile.Windows
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,20 @@ ARG GOEXPERIMENT=boringcrypto

RUN /go/bin/builder --config ./swi-k8s-opentelemetry-collector.yaml --output-path ./

FROM base as wrapper
WORKDIR /src/src/wrapper

ARG CGO_ENABLED=0
ARG GOEXPERIMENT=boringcrypto

RUN go build -a -o /wrapper.exe

FROM mcr.microsoft.com/windows/nanoserver:ltsc2022

COPY --from=builder /src/swi-k8s-opentelemetry-collector /swi-otelcol.exe
ENTRYPOINT ["swi-otelcol.exe"]
CMD ["--config=c:/config/default-config.yaml"]
COPY --from=wrapper /wrapper.exe /wrapper.exe

ENTRYPOINT ["wrapper.exe"]
CMD ["swi-otelcol.exe", "--config=/opt/default-config.yaml"]


3 changes: 3 additions & 0 deletions src/wrapper/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module wrapper

go 1.20
85 changes: 85 additions & 0 deletions src/wrapper/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
The goal of this wrapper is to detect panic that indicates corruption of checkpoints folder
(which can happend during kernel crash or system restart) and in case it is detected it removes checkpoint folder
*/

package main

import (
"bufio"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
)

const (
panicMessage = "panic: assertion failed: Page expected to be"
)

func main() {
checkpointDir := os.Getenv("CHECKPOINT_DIR")

if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "Error: Missing command arguments")
os.Exit(1)
}

cmd := exec.Command(os.Args[1], os.Args[2:]...)
cmd.Stdout = os.Stdout // Redirect stdout of cmd to stdout of wrapper

r, w := io.Pipe()
cmd.Stderr = w // Redirect stderr of cmd to the writer end of the pipe

var wg sync.WaitGroup
wg.Add(1)

// Goroutine to monitor the stderr output for panic message
go func() {
defer wg.Done()
scanner := bufio.NewScanner(r) // Read from the reader end of the pipe
for scanner.Scan() {
line := scanner.Text()
fmt.Fprintln(os.Stderr, line) // Forward the stderr output to stderr of wrapper
if strings.Contains(line, panicMessage) {
fmt.Fprintln(os.Stderr, "Specific panic detected, deleting all files in checkpoint folder...")
if checkpointDir != "" {
err := filepath.Walk(checkpointDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if path != checkpointDir { // Skip the root directory
return os.RemoveAll(path)
}
return nil
})
if err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
}
}
}
}
}()

// Start the command
err := cmd.Start()
if err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
os.Exit(1)
}

// Wait for the command to exit
err = cmd.Wait()
if err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
}

// Close the writer end of the pipe to signal the end of data
w.Close()

// Wait for the output monitoring goroutine to finish
wg.Wait()
}

0 comments on commit 4202e99

Please sign in to comment.