Skip to content

Commit

Permalink
Merge pull request #185 from spectrocloud/PCP-3333-newfilter
Browse files Browse the repository at this point in the history
PCP-3333-newfilter: add AdditionalFilters to filter out pods that have UnreachableToleration
  • Loading branch information
sadysnaat authored Aug 9, 2024
2 parents 7e65301 + f6282f9 commit cb6f8a6
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
21 changes: 20 additions & 1 deletion internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ import (

const (
// controllerName defines the controller used when creating clients.
controllerName = "machine-controller"
controllerName = "machine-controller"
nodeUnreachableKey = "node.kubernetes.io/unschedulable"
)

var (
Expand All @@ -66,6 +67,11 @@ var (
errNoControlPlaneNodes = errors.New("no control plane members")
errClusterIsBeingDeleted = errors.New("cluster is being deleted")
errControlPlaneIsBeingDeleted = errors.New("control plane is being deleted")
unreachableToleration = corev1.Toleration{
Key: nodeUnreachableKey,
Effect: corev1.TaintEffectNoSchedule,
Operator: corev1.TolerationOpExists,
}
)

// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
Expand Down Expand Up @@ -618,6 +624,9 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
ErrOut: writer{func(msg string, keysAndValues ...interface{}) {
log.Error(nil, msg, keysAndValues...)
}},
AdditionalFilters: []kubedrain.PodFilter{
skipUnreachableTolerationPods,
},
// SPECTRO: Even if the node is reachable, we wait 30 minutes for drain completion else move ahead
SkipWaitForDeleteTimeoutSeconds: 60 * 30, // 30 minutes
}
Expand All @@ -643,6 +652,16 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
return ctrl.Result{}, nil
}

func skipUnreachableTolerationPods(pod corev1.Pod) kubedrain.PodDeleteStatus {
if pod.Spec.Tolerations == nil {
return kubedrain.MakePodDeleteStatusOkay()
}
if HasTolerations(&pod, &unreachableToleration) {
return kubedrain.MakePodDeleteStatusSkip()
}
return kubedrain.MakePodDeleteStatusOkay()
}

// shouldWaitForNodeVolumes returns true if node status still have volumes attached
// pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
// this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic
Expand Down
10 changes: 10 additions & 0 deletions internal/controllers/machine/machine_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package machine

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)
Expand All @@ -37,3 +38,12 @@ func HasMatchingLabels(matchSelector metav1.LabelSelector, matchLabels map[strin
}
return true
}

func HasTolerations(pod *corev1.Pod, toleration *corev1.Toleration) bool {
for _, t := range pod.Spec.Tolerations {
if t.MatchToleration(toleration) {
return true
}
}
return false
}

0 comments on commit cb6f8a6

Please sign in to comment.