Skip to content

Commit

Permalink
review comments + log cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
geropl committed Nov 12, 2024
1 parent 791efa6 commit 88c40a7
Show file tree
Hide file tree
Showing 14 changed files with 31 additions and 35 deletions.
3 changes: 2 additions & 1 deletion components/ws-daemon/pkg/cgroup/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package cgroup

import (
"context"
"errors"

"github.com/gitpod-io/gitpod/common-go/cgroups"
"github.com/gitpod-io/gitpod/common-go/log"
Expand Down Expand Up @@ -81,7 +82,7 @@ func (host *PluginHost) WorkspaceAdded(ctx context.Context, ws *dispatch.Workspa

cgroupPath, err := disp.Runtime.ContainerCGroupPath(ctx, ws.ContainerID)
if err != nil {
if err == context.Canceled {
if errors.Is(err, context.Canceled) {
return nil
}
return xerrors.Errorf("cannot get cgroup path for container %s: %w", ws.ContainerID, err)
Expand Down
4 changes: 2 additions & 2 deletions components/ws-daemon/pkg/container/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,8 @@ func (s *Containerd) WaitForContainerStop(ctx context.Context, workspaceInstance
func (s *Containerd) DisposeContainer(ctx context.Context, workspaceInstanceID string) {
log := log.WithContext(ctx)

log.Debug("CONTAINERD: DISPOSING CONTAINER")
defer log.Debug("CONTAINERD: DISPOSING CONTAINER DONE")
log.Debug("containerd: disposing container")
defer log.Debug("containerd: disposing container done")

s.cond.L.Lock()
defer s.cond.L.Unlock()
Expand Down
1 change: 1 addition & 0 deletions components/ws-daemon/pkg/content/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ func hookWipingTeardown() session.WorkspaceLivecycleHook {

if !ws.DoWipe {
// this is the "default" case for 99% of all workspaces
// TODO(gpl): We should probably make this the default for all workspaces - but not with this PR
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions components/ws-daemon/pkg/controller/workspace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,13 @@ func (wsc *WorkspaceController) doWipeWorkspace(ctx context.Context, ws *workspa
return ctrl.Result{Requeue: true, RequeueAfter: 100 * time.Millisecond}, nil
}

setStateWipedCondition := func(s bool) {
setStateWipedCondition := func(success bool) {
err := retry.RetryOnConflict(retryParams, func() error {
if err := wsc.Get(ctx, req.NamespacedName, ws); err != nil {
return err
}

if s {
if success {
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStateWiped("", metav1.ConditionTrue))
} else {
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStateWiped("", metav1.ConditionFalse))
Expand Down
8 changes: 3 additions & 5 deletions components/ws-daemon/pkg/controller/workspace_operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,6 @@ func (wso *DefaultWorkspaceOperations) WipeWorkspace(ctx context.Context, instan
wso.dispatch.DisposeWorkspace(ctx, instanceID)

// remove workspace daemon directory in the node
log.Debug("DELETING WORKSPACE DAEMON DIR")
defer log.Debug("DELETING WORKSPACE DAEMON DIR DONE")

removedChan := make(chan struct{}, 1)
go func() {
defer close(removedChan)
Expand All @@ -332,9 +329,10 @@ func (wso *DefaultWorkspaceOperations) WipeWorkspace(ctx context.Context, instan
}
}()

timeoutT := time.NewTicker(10 * time.Second)
timeout := time.NewTicker(10 * time.Second)
defer timeout.Stop()
select {
case <-timeoutT.C:
case <-timeout.C:
case <-removedChan:
log.Debug("successfully removed workspace daemon directory")
}
Expand Down
2 changes: 1 addition & 1 deletion components/ws-daemon/pkg/cpulimit/dispatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func (d *DispatchListener) WorkspaceAdded(ctx context.Context, ws *dispatch.Work

cgroupPath, err := disp.Runtime.ContainerCGroupPath(ctx, ws.ContainerID)
if err != nil {
if dispatch.IsCancelled(ctx) {
if errors.Is(err, context.Canceled) {
return nil
}
return xerrors.Errorf("cannot start governer: %w", err)
Expand Down
3 changes: 2 additions & 1 deletion components/ws-daemon/pkg/daemon/markunmount.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"bufio"
"bytes"
"context"
"errors"
"io/ioutil"
"path/filepath"
"strings"
Expand Down Expand Up @@ -127,7 +128,7 @@ func (c *MarkUnmountFallback) WorkspaceUpdated(ctx context.Context, ws *dispatch
}

err := unmountMark(ws.InstanceID)
if err != nil && !dispatch.IsCancelled(ctx) {
if err != nil && errors.Is(err, context.Canceled) {
log.WithFields(ws.OWI()).WithError(err).Error("cannot unmount mark mount from within ws-daemon")
c.activityCounter.WithLabelValues("false").Inc()
} else {
Expand Down
1 change: 1 addition & 0 deletions components/ws-daemon/pkg/diskguard/guard.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type Guard struct {
// Start starts the disk guard
func (g *Guard) Start() {
t := time.NewTicker(g.Interval)
defer t.Stop()
for {
bvail, err := getAvailableBytes(g.Path)
if err != nil {
Expand Down
16 changes: 5 additions & 11 deletions components/ws-daemon/pkg/dispatch/dispatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,6 @@ func GetDispatchWaitGroup(ctx context.Context) *sync.WaitGroup {
return ctx.Value(contextDispatchWaitGroup).(*sync.WaitGroup)
}

func IsCancelled(ctx context.Context) bool {
return context.Cause(ctx) != nil
}

// Start starts the dispatch
func (d *Dispatch) Start() error {
ifac := informers.NewSharedInformerFactoryWithOptions(d.Kubernetes, podInformerResyncInterval, informers.WithNamespace(d.KubernetesNamespace))
Expand Down Expand Up @@ -195,8 +191,8 @@ func (d *Dispatch) DisposeWorkspace(ctx context.Context, instanceID string) {
d.mu.Lock()
defer d.mu.Unlock()

log.WithField("instanceID", instanceID).Debug("WS DISPOSE")
defer log.WithField("instanceID", instanceID).Debug("WS DISPOSE DONE")
log.WithField("instanceID", instanceID).Debug("disposing workspace")
defer log.WithField("instanceID", instanceID).Debug("disposing workspace done")

// If we have that instanceID present, cancel it's context
state, present := d.ctxs[instanceID]
Expand Down Expand Up @@ -237,10 +233,9 @@ func (d *Dispatch) handlePodUpdate(oldPod, newPod *corev1.Pod) {
}
disposedKey := disposedKey(workspaceInstanceID, newPod)
if _, alreadyDisposed := d.disposedCtxs[disposedKey]; alreadyDisposed {
log.WithField("disposedKey", disposedKey).Debug("DROPPING POD UPDATE FOR DISPOSED POD")
log.WithField("disposedKey", disposedKey).Debug("dropping pod update for disposed pod")
return
}
log.WithField("instanceID", workspaceInstanceID).Debugf("POD UPDATE: %s", workspaceInstanceID)

d.mu.Lock()
defer d.mu.Unlock()
Expand Down Expand Up @@ -337,15 +332,15 @@ func (d *Dispatch) handlePodUpdate(oldPod, newPod *corev1.Pod) {
}
}()
}
log.WithField("instanceID", workspaceInstanceID).Debugf("POD UPDATE DONE: %s", workspaceInstanceID)
}

func (d *Dispatch) handlePodDeleted(pod *corev1.Pod) {
instanceID, ok := pod.Labels[wsk8s.WorkspaceIDLabel]
if !ok {
return
}
log.WithField("instanceID", instanceID).Debugf("POD DELETED: %s", instanceID)
log.WithField("instanceID", instanceID).Debug("pod deleted")
defer log.WithField("instanceID", instanceID).Debug("pod deleted done")

d.mu.Lock()
defer d.mu.Unlock()
Expand All @@ -361,5 +356,4 @@ func (d *Dispatch) handlePodDeleted(pod *corev1.Pod) {

delete(d.ctxs, instanceID)

log.WithField("instanceID", instanceID).Debugf("POD DELETED DONE: %s", instanceID)
}
8 changes: 3 additions & 5 deletions components/ws-daemon/pkg/iws/iws.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,9 +961,6 @@ func (wbs *InWorkspaceServiceServer) Teardown(ctx context.Context, req *api.Tear
owi := wbs.Session.OWI()
log := log.WithFields(owi)

log.Debug("TEARDOWN")
defer log.Debug("TEARDOWN DONE")

var (
success = true
err error
Expand All @@ -981,8 +978,9 @@ func (wbs *InWorkspaceServiceServer) Teardown(ctx context.Context, req *api.Tear
// WipingTeardown tears down every state we created using IWS
func (wbs *InWorkspaceServiceServer) WipingTeardown(ctx context.Context, req *api.WipingTeardownRequest) (*api.WipingTeardownResponse, error) {
log := log.WithFields(wbs.Session.OWI())
log.WithField("doWipe", req.DoWipe).Debug("WIPING TEARDOWN")
defer log.WithField("doWipe", req.DoWipe).Debug("WIPING TEARDOWN DONE")
log.WithField("doWipe", req.DoWipe).Debug("iws.WipingTeardown")
defer log.WithField("doWipe", req.DoWipe).Debug("iws.WipingTeardown done")

if !req.DoWipe {
return &api.WipingTeardownResponse{Success: true}, nil
}
Expand Down
5 changes: 3 additions & 2 deletions components/ws-daemon/pkg/netlimit/netlimit.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package netlimit

import (
"context"
"errors"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -130,7 +131,7 @@ func (c *ConnLimiter) limitWorkspace(ctx context.Context, ws *dispatch.Workspace

pid, err := disp.Runtime.ContainerPID(ctx, ws.ContainerID)
if err != nil {
if dispatch.IsCancelled(ctx) {
if errors.Is(err, context.Canceled) {
return nil
}
return fmt.Errorf("could not get pid for container %s of workspace %s", ws.ContainerID, ws.WorkspaceID)
Expand All @@ -144,7 +145,7 @@ func (c *ConnLimiter) limitWorkspace(ctx context.Context, ws *dispatch.Workspace
}
}, nsinsider.EnterMountNS(false), nsinsider.EnterNetNS(true))
if err != nil {
if dispatch.IsCancelled(ctx) {
if errors.Is(err, context.Canceled) {
return nil
}
log.WithError(err).WithFields(ws.OWI()).Error("cannot enable connection limiting")
Expand Down
1 change: 1 addition & 0 deletions components/ws-daemon/pkg/nsinsider/nsinsider.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ func Nsinsider(instanceID string, targetPid int, mod func(*exec.Cmd), opts ...ns
_, _ = io.Copy(os.Stderr, &cmdErr)
log.FromBuffer(&cmdOut, log.WithFields(log.OWI("", "", instanceID)))
if err != nil {
// writing stderr to the error so clients can pattern match on specific errors
return fmt.Errorf("run nsinsider (%v) failed: %q \\ %q\n%v",
cmd.Args,
cmdOut.String(),
Expand Down
1 change: 0 additions & 1 deletion components/ws-manager-mk2/controllers/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ func (r *WorkspaceReconciler) updateWorkspaceStatus(ctx context.Context, workspa
workspace.Status.Phase = workspacev1.WorkspacePhaseStopped
}

log.WithValues("podDeletionTime", workspace.Status.PodDeletionTime).Info("PodDeletionTimeValue")
if workspace.Status.Phase == workspacev1.WorkspacePhaseStopped && workspace.Status.PodDeletionTime == nil {
// Set the timestamp when we first saw the pod as deleted.
// This is used for the delaying eventual pod restarts
Expand Down
9 changes: 5 additions & 4 deletions components/ws-manager-mk2/controllers/workspace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,22 +224,23 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionPodRejected(fmt.Sprintf("Pod reached maximum recreations %d, failing", workspace.Status.PodRecreated), metav1.ConditionFalse))
return ctrl.Result{Requeue: true}, nil // requeue so we end up in the "Stopped" case below
}
log.WithValues("PodStarts", workspace.Status.PodStarts, "PodRecreated", workspace.Status.PodRecreated, "Phase", workspace.Status.Phase).Info("trigger pod recreation")
log = log.WithValues("PodStarts", workspace.Status.PodStarts, "PodRecreated", workspace.Status.PodRecreated, "Phase", workspace.Status.Phase)

// Make sure to wait for "recreationTimeout" before creating the pod again
if workspace.Status.PodDeletionTime == nil {
log.Info("want to wait for pod recreation timeout, but podDeletionTime not set (yet)")
log.Info("pod recreation: waiting for pod deletion time to be populated...")
return ctrl.Result{Requeue: true, RequeueAfter: 5 * time.Second}, nil
}

recreationTimeout := r.podRecreationTimeout()
podDeletionTime := workspace.Status.PodDeletionTime.Time
waitTime := time.Until(podDeletionTime.Add(recreationTimeout))
log = log.WithValues("waitTime", waitTime.String(), "recreationTimeout", recreationTimeout.String(), "podDeletionTime", podDeletionTime.String())
if waitTime > 0 {
log.WithValues("waitTime", waitTime).Info("waiting for pod recreation timeout")
log.Info("pod recreation: waiting for timeout...")
return ctrl.Result{Requeue: true, RequeueAfter: waitTime}, nil
}
log.WithValues("waitedTime", waitTime.Abs().String()).Info("waited for pod recreation timeout")
log.Info("trigger pod recreation")

// Must persist the modification pod starts, and ensure we retry on conflict.
// If we fail to persist this value, it's possible that the Pod gets recreated endlessly
Expand Down

0 comments on commit 88c40a7

Please sign in to comment.