Skip to content

Commit

Permalink
✨ Add MachineDrainRule "WaitCompleted"
Browse files Browse the repository at this point in the history
Signed-off-by: Vince Prignano <[email protected]>
  • Loading branch information
vincepri committed Jan 9, 2025
1 parent 2f5d70a commit 8826d5b
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 65 deletions.
10 changes: 7 additions & 3 deletions api/v1beta1/machinedrainrules_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ import (

const (
// PodDrainLabel is the label that can be set on Pods in workload clusters to ensure a Pod is not drained.
// The only valid value is "skip".
// The only valid values are "skip" and "wait-completed".
// This label takes precedence over MachineDrainRules defined in the management cluster.
PodDrainLabel = "cluster.x-k8s.io/drain"
)

// MachineDrainRuleDrainBehavior defines the drain behavior. Can be either "Drain" or "Skip".
// +kubebuilder:validation:Enum=Drain;Skip
// MachineDrainRuleDrainBehavior defines the drain behavior. Can be either "Drain", "Skip", or "WaitCompleted".
// +kubebuilder:validation:Enum=Drain;Skip;WaitCompleted
type MachineDrainRuleDrainBehavior string

const (
Expand All @@ -37,6 +37,10 @@ const (

// MachineDrainRuleDrainBehaviorSkip means the drain for a Pod should be skipped.
MachineDrainRuleDrainBehaviorSkip MachineDrainRuleDrainBehavior = "Skip"

// MachineDrainRuleDrainBehaviorWaitCompleted means the Pod should not be evicted,
// but overall drain should wait until the Pod completes.
MachineDrainRuleDrainBehaviorWaitCompleted MachineDrainRuleDrainBehavior = "WaitCompleted"
)

// MachineDrainRuleSpec defines the spec of a MachineDrainRule.
Expand Down
1 change: 1 addition & 0 deletions config/crd/bases/cluster.x-k8s.io_machinedrainrules.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

107 changes: 55 additions & 52 deletions docs/proposals/20240930-machine-drain-rules.md

Large diffs are not rendered by default.

23 changes: 19 additions & 4 deletions internal/controllers/machine/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func (d *Helper) GetPodsForEviction(ctx context.Context, cluster *clusterv1.Clus
podNamespaces[ns.Name] = &ns
}

// Note: As soon as a filter decides that a Pod should be skipped (i.e. DrainBehavior == "Skip")
// Note: As soon as a filter decides that a Pod should be skipped (i.e. DrainBehavior == "Skip" or "WaitCompleted")
// other filters won't be evaluated and the Pod will be skipped.
list := filterPods(ctx, allPods, []PodFilter{
// Phase 1: Basic filtering (aligned to kubectl drain)
Expand All @@ -150,7 +150,7 @@ func (d *Helper) GetPodsForEviction(ctx context.Context, cluster *clusterv1.Clus

// Phase 2: Filtering based on drain label & MachineDrainRules

// Skip Pods with label cluster.x-k8s.io/drain == "skip"
// Skip Pods with label cluster.x-k8s.io/drain == "skip" or "wait-completed"
d.drainLabelFilter,

// Use drain behavior and order from first matching MachineDrainRule
Expand Down Expand Up @@ -225,7 +225,8 @@ func filterPods(ctx context.Context, allPods []*corev1.Pod, filters []PodFilter)
var deleteWarnings []string
for _, filter := range filters {
status = filter(ctx, pod)
if status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorSkip {
if status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorSkip ||
status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted {
// short-circuit as soon as pod is filtered out
// at that point, there is no reason to run pod
// through any additional filters
Expand Down Expand Up @@ -281,6 +282,7 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
var podsToTriggerEvictionLater []PodDelete
var podsWithDeletionTimestamp []PodDelete
var podsToBeIgnored []PodDelete
var podsToWait []PodDelete
for _, pod := range podDeleteList.items {
switch {
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain && pod.Pod.DeletionTimestamp.IsZero():
Expand All @@ -289,6 +291,8 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
} else {
podsToTriggerEvictionLater = append(podsToTriggerEvictionLater, pod)
}
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted:
podsToWait = append(podsToWait, pod)
case pod.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain:
podsWithDeletionTimestamp = append(podsWithDeletionTimestamp, pod)
default:
Expand All @@ -300,6 +304,7 @@ func (d *Helper) EvictPods(ctx context.Context, podDeleteList *PodDeleteList) Ev
"podsToTriggerEvictionNow", podDeleteListToString(podsToTriggerEvictionNow, 5),
"podsToTriggerEvictionLater", podDeleteListToString(podsToTriggerEvictionLater, 5),
"podsWithDeletionTimestamp", podDeleteListToString(podsWithDeletionTimestamp, 5),
"podsToWaitCompleted", podDeleteListToString(podsToWait, 5),
)

// Trigger evictions for at most 10s. We'll continue on the next reconcile if we hit the timeout.
Expand Down Expand Up @@ -394,6 +399,10 @@ evictionLoop:
res.PodsToTriggerEvictionLater = append(res.PodsToTriggerEvictionLater, pd.Pod)
}

for _, pd := range podsToWait {
res.PodsToWaitCompleted = append(res.PodsToWaitCompleted, pd.Pod)
}

return res
}

Expand Down Expand Up @@ -431,13 +440,15 @@ type EvictionResult struct {
PodsDeletionTimestampSet []*corev1.Pod
PodsFailedEviction map[string][]*corev1.Pod
PodsToTriggerEvictionLater []*corev1.Pod
PodsToWaitCompleted []*corev1.Pod
PodsNotFound []*corev1.Pod
PodsIgnored []*corev1.Pod
}

// DrainCompleted returns if a Node is entirely drained, i.e. if all relevant Pods have gone away.
func (r EvictionResult) DrainCompleted() bool {
return len(r.PodsDeletionTimestampSet) == 0 && len(r.PodsFailedEviction) == 0 && len(r.PodsToTriggerEvictionLater) == 0
return len(r.PodsDeletionTimestampSet) == 0 && len(r.PodsFailedEviction) == 0 &&
len(r.PodsToTriggerEvictionLater) == 0 && len(r.PodsToWaitCompleted) == 0
}

// ConditionMessage returns a condition message for the case where a drain is not completed.
Expand Down Expand Up @@ -498,6 +509,10 @@ func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string
conditionMessage = fmt.Sprintf("%s\nAfter above Pods have been removed from the Node, the following Pods will be evicted: %s",
conditionMessage, PodListToString(r.PodsToTriggerEvictionLater, 3))
}
if len(r.PodsToWaitCompleted) > 0 {
conditionMessage = fmt.Sprintf("%s\nWaiting for the following Pods to complete without eviction: %s",
conditionMessage, PodListToString(r.PodsToWaitCompleted, 3))
}
return conditionMessage
}

Expand Down
113 changes: 111 additions & 2 deletions internal/controllers/machine/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,32 @@ func TestGetPodsForEviction(t *testing.T) {
},
},
}
mdrBehaviorWaitCompleted := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-wait-completed",
Namespace: "test-namespace",
},
Spec: clusterv1.MachineDrainRuleSpec{
Drain: clusterv1.MachineDrainRuleDrainConfig{
Behavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
},
Machines: nil, // Match all machines
Pods: []clusterv1.MachineDrainRulePodSelector{
{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "behavior-wait-completed",
},
},
NamespaceSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"kubernetes.io/metadata.name": "test-namespace",
},
},
},
},
},
}
mdrBehaviorUnknown := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-unknown",
Expand Down Expand Up @@ -637,6 +663,15 @@ func TestGetPodsForEviction(t *testing.T) {
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-skip-pod-with-drain-label-wait-completed",
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.PodDrainLabel: "wait-completed",
},
},
},
},
wantPodDeleteList: PodDeleteList{items: []PodDelete{
{
Expand All @@ -663,6 +698,18 @@ func TestGetPodsForEviction(t *testing.T) {
Reason: PodDeleteStatusTypeSkip,
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-skip-pod-with-drain-label-wait-completed",
Namespace: metav1.NamespaceDefault,
},
},
Status: PodDeleteStatus{
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
Reason: PodDeleteStatusTypeWaitCompleted,
},
},
}},
},
{
Expand Down Expand Up @@ -714,8 +761,17 @@ func TestGetPodsForEviction(t *testing.T) {
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-behavior-wait-completed",
Namespace: "test-namespace", // matches the Namespace of the selector in mdrBehaviorWaitCompleted.
Labels: map[string]string{
"app": "behavior-wait-completed", // matches mdrBehaviorWaitCompleted.
},
},
},
},
machineDrainRules: []*clusterv1.MachineDrainRule{mdrBehaviorDrain, mdrBehaviorSkip, mdrBehaviorUnknown},
machineDrainRules: []*clusterv1.MachineDrainRule{mdrBehaviorDrain, mdrBehaviorSkip, mdrBehaviorUnknown, mdrBehaviorWaitCompleted},
wantPodDeleteList: PodDeleteList{items: []PodDelete{
{
Pod: &corev1.Pod{
Expand Down Expand Up @@ -744,6 +800,18 @@ func TestGetPodsForEviction(t *testing.T) {
Reason: PodDeleteStatusTypeSkip,
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-3-behavior-wait-completed",
Namespace: "test-namespace",
},
},
Status: PodDeleteStatus{
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
Reason: PodDeleteStatusTypeWaitCompleted,
},
},
}},
},
}
Expand Down Expand Up @@ -918,6 +986,19 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
Pods: nil, // Match all Pods
},
}
matchingMDRBehaviorWaitCompleted := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-wait-completed",
Namespace: "test-namespace",
},
Spec: clusterv1.MachineDrainRuleSpec{
Drain: clusterv1.MachineDrainRuleDrainConfig{
Behavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
},
Machines: nil, // Match all machines
Pods: nil, // Match all Pods
},
}
matchingMDRBehaviorUnknown := &clusterv1.MachineDrainRule{
ObjectMeta: metav1.ObjectMeta{
Name: "mdr-behavior-unknown",
Expand Down Expand Up @@ -986,6 +1067,7 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
matchingMDRBehaviorDrainB,
matchingMDRBehaviorDrainA,
matchingMDRBehaviorSkip,
matchingMDRBehaviorWaitCompleted,
matchingMDRBehaviorUnknown,
notMatchingMDRDifferentNamespace,
notMatchingMDRNotMatchingSelector,
Expand All @@ -995,6 +1077,7 @@ func Test_getMatchingMachineDrainRules(t *testing.T) {
matchingMDRBehaviorDrainB,
matchingMDRBehaviorSkip,
matchingMDRBehaviorUnknown,
matchingMDRBehaviorWaitCompleted,
},
},
}
Expand Down Expand Up @@ -1291,6 +1374,17 @@ func TestEvictPods(t *testing.T) {
Reason: PodDeleteStatusTypeOkay,
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-9-wait-completed",
},
},
Status: PodDeleteStatus{
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted, // Will be skipped because DrainBehavior is set to WaitCompleted
Reason: PodDeleteStatusTypeWaitCompleted,
},
},
}},
wantEvictionResult: EvictionResult{
PodsIgnored: []*corev1.Pod{
Expand Down Expand Up @@ -1354,6 +1448,13 @@ func TestEvictPods(t *testing.T) {
},
},
},
PodsToWaitCompleted: []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-9-wait-completed",
},
},
},
},
},
}
Expand Down Expand Up @@ -1651,6 +1752,13 @@ After above Pods have been removed from the Node, the following Pods will be evi
},
},
},
PodsToWaitCompleted: []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-16-wait-completed",
},
},
},
},
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, ... (4 more): deletionTimestamp set, but still not removed from the Node
Expand All @@ -1660,7 +1768,8 @@ After above Pods have been removed from the Node, the following Pods will be evi
* Pod pod-8-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 3
* Pod pod-9-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 4
* 1 Pod with other issues
After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)`,
After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)
Waiting for the following Pods to complete without eviction: pod-16-wait-completed`,
},
{
name: "Compute long condition message correctly with more skipped errors",
Expand Down
26 changes: 23 additions & 3 deletions internal/controllers/machine/drain/filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ type PodDeleteList struct {
func (l *PodDeleteList) Pods() []*corev1.Pod {
pods := []*corev1.Pod{}
for _, i := range l.items {
if i.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain {
if i.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorDrain ||
i.Status.DrainBehavior == clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted {
pods = append(pods, i.Pod)
}
}
Expand Down Expand Up @@ -124,6 +125,8 @@ const (
PodDeleteStatusTypeOkay = "Okay"
// PodDeleteStatusTypeSkip is "Skip".
PodDeleteStatusTypeSkip = "Skip"
// PodDeleteStatusTypeWaitCompleted is "WaitCompleted".
PodDeleteStatusTypeWaitCompleted = "WaitCompleted"
// PodDeleteStatusTypeWarning is "Warning".
PodDeleteStatusTypeWarning = "Warning"
// PodDeleteStatusTypeError is "Error".
Expand Down Expand Up @@ -156,6 +159,14 @@ func MakePodDeleteStatusSkip() PodDeleteStatus {
}
}

// MakePodDeleteStatusWaitCompleted is a helper method to return the corresponding PodDeleteStatus.
func MakePodDeleteStatusWaitCompleted() PodDeleteStatus {
return PodDeleteStatus{
DrainBehavior: clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted,
Reason: PodDeleteStatusTypeWaitCompleted,
}
}

// MakePodDeleteStatusWithWarning is a helper method to return the corresponding PodDeleteStatus.
func MakePodDeleteStatusWithWarning(behavior clusterv1.MachineDrainRuleDrainBehavior, message string) PodDeleteStatus {
var order *int32
Expand Down Expand Up @@ -272,9 +283,15 @@ func (d *Helper) skipDeletedFilter(ctx context.Context, pod *corev1.Pod) PodDele
func (d *Helper) drainLabelFilter(ctx context.Context, pod *corev1.Pod) PodDeleteStatus {
if labelValue, found := pod.ObjectMeta.Labels[clusterv1.PodDrainLabel]; found && strings.EqualFold(labelValue, string(clusterv1.MachineDrainRuleDrainBehaviorSkip)) {
log := ctrl.LoggerFrom(ctx, "Pod", klog.KObj(pod))
log.V(4).Info(fmt.Sprintf("Skip evicting Pod, because Pod has %s label", clusterv1.PodDrainLabel))
log.V(4).Info(fmt.Sprintf("Skip evicting Pod, because Pod has %s label with %s value", clusterv1.PodDrainLabel, labelValue))
return MakePodDeleteStatusSkip()
}
if labelValue, found := pod.ObjectMeta.Labels[clusterv1.PodDrainLabel]; found &&
strings.EqualFold(strings.Replace(labelValue, "-", "", 1), string(clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted)) {
log := ctrl.LoggerFrom(ctx, "Pod", klog.KObj(pod))
log.V(4).Info(fmt.Sprintf("Skip evicting Pod, because Pod has %s label with %s value", clusterv1.PodDrainLabel, labelValue))
return MakePodDeleteStatusWaitCompleted()
}
return MakePodDeleteStatusOkay()
}

Expand All @@ -293,13 +310,16 @@ func (d *Helper) machineDrainRulesFilter(machineDrainRules []*clusterv1.MachineD
}

// If the pod selector matches, use the drain behavior from the MachineDrainRule.
log := ctrl.LoggerFrom(ctx, "Pod", klog.KObj(pod))
switch mdr.Spec.Drain.Behavior {
case clusterv1.MachineDrainRuleDrainBehaviorDrain:
return MakePodDeleteStatusOkayWithOrder(mdr.Spec.Drain.Order)
case clusterv1.MachineDrainRuleDrainBehaviorSkip:
log := ctrl.LoggerFrom(ctx, "Pod", klog.KObj(pod))
log.V(4).Info(fmt.Sprintf("Skip evicting Pod, because MachineDrainRule %s with behavior %s applies to the Pod", mdr.Name, clusterv1.MachineDrainRuleDrainBehaviorSkip))
return MakePodDeleteStatusSkip()
case clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted:
log.V(4).Info(fmt.Sprintf("Skip evicting Pod, because MachineDrainRule %s with behavior %s applies to the Pod", mdr.Name, clusterv1.MachineDrainRuleDrainBehaviorWaitCompleted))
return MakePodDeleteStatusWaitCompleted()
default:
return MakePodDeleteStatusWithError(
fmt.Sprintf("MachineDrainRule %q has unknown spec.drain.behavior: %q",
Expand Down
Loading

0 comments on commit 8826d5b

Please sign in to comment.