Skip to content

Commit

Permalink
Fix race condition in PVC deletion
Browse files Browse the repository at this point in the history
fixes [#7138][#7138]. In `coschedule:pipelineruns` mode, the `pvcs` created by `VolumeClaimTemplate` should be automatically deleted when the owning `pipelinerun` is completed.
To delete a `pvc` used by a pod (pipelinerun), the [kubernetes.io/pvc-protection][finalizer] finalizer must be removed.

Prior to this commit, the Tekton reconciler attempted to remove the `kubernetes.io/pvc-protection` finalizer first and then deletes the created pvcs. This results
in race condition since `PVCProtectionController` tries to add back the finalizer if the `pvc` is in `bounded` status . If the add back action happens before the PVC deletion call is completed,
the `PVC` will be left in `terminating` status.

This commit changes the reconciler to delete the `PVC` first and then removes the `finalizer` to fix the race condition. This commit also adds integration test to
validate `pvc` status when a `pipelinerun` is completed.

/kind bug

[#7138]: #7138
[finalizer]: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#storage-object-in-use-protection
  • Loading branch information
QuanZhang-William committed Sep 25, 2023
1 parent 44e1707 commit 2b52afc
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 6 deletions.
12 changes: 6 additions & 6 deletions pkg/reconciler/volumeclaim/pvchandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,18 @@ func (c *defaultPVCHandler) PurgeFinalizerAndDeletePVCForWorkspace(ctx context.C
return fmt.Errorf("failed to marshal jsonpatch: %w", err)
}

// patch the existing PVC to update the finalizers
_, err = c.clientset.CoreV1().PersistentVolumeClaims(namespace).Patch(ctx, pvcName, types.JSONPatchType, removeFinalizerBytes, metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to patch the PVC %s: %w", pvcName, err)
}

// delete PVC
err = c.clientset.CoreV1().PersistentVolumeClaims(namespace).Delete(ctx, pvcName, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("failed to delete the PVC %s: %w", pvcName, err)
}

// remove finalizer
_, err = c.clientset.CoreV1().PersistentVolumeClaims(namespace).Patch(ctx, pvcName, types.JSONPatchType, removeFinalizerBytes, metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to patch the PVC %s: %w", pvcName, err)
}

return nil
}

Expand Down
6 changes: 6 additions & 0 deletions test/affinity_assistant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ spec:
t.Errorf("Error waiting for PipelineRun to finish: %s", err)
}

// wait and check PVCs are deleted
t.Logf("Waiting for PVC in namespace %s to delete", namespace)
if err := WaitForPVCIsDeleted(ctx, c, timeout, prName, namespace, "PVCDeleted"); err != nil {
t.Errorf("Error waiting for PVC to be deleted: %s", err)
}

// validate PipelineRun pods sharing the same PVC are scheduled to the same node
podNames := []string{fmt.Sprintf("%v-foo-pod", prName), fmt.Sprintf("%v-bar-pod", prName), fmt.Sprintf("%v-double-ws-pod", prName), fmt.Sprintf("%v-no-ws-pod", prName)}
validatePodAffinity(t, ctx, podNames, namespace, c.KubeClient)
Expand Down
25 changes: 25 additions & 0 deletions test/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,31 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
})
}

// WaitForPVCIsDeleted polls the number of the PVC in the namespace from client every
// interval until all the PVCs in the namespace are deleted. It returns an
// error or timeout. desc will be used to name the metric that is emitted to
// track how long it took to delete all the PVCs in the namespace.
func WaitForPVCIsDeleted(ctx context.Context, c *clients, polltimeout time.Duration, name, namespace, desc string) error {
metricName := fmt.Sprintf("WaitForPVCIsDeleted/%s/%s", name, desc)
_, span := trace.StartSpan(context.Background(), metricName)
defer span.End()

ctx, cancel := context.WithTimeout(ctx, polltimeout)
defer cancel()

return pollImmediateWithContext(ctx, func() (bool, error) {
pvcList, err := c.KubeClient.CoreV1().PersistentVolumeClaims(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return true, err
}
if len(pvcList.Items) > 0 {
return false, nil
}

return true, nil
})
}

// WaitForPipelineRunState polls the status of the PipelineRun called name from client every
// interval until inState returns `true` indicating it is done, returns an
// error or timeout. desc will be used to name the metric that is emitted to
Expand Down

0 comments on commit 2b52afc

Please sign in to comment.