Merge pull request #185 from spectrocloud/PCP-3333-newfilter

PCP-3333-newfilter: add AdditionalFilters to filter out pods that have UnreachableToleration
spectrocloud · Aug 9, 2024 · cb6f8a6 · cb6f8a6
2 parents 7e65301 + f6282f9
commit cb6f8a6
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 1 deletion.
diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go
@@ -57,7 +57,8 @@ import (
 
 const (
 	// controllerName defines the controller used when creating clients.
-	controllerName = "machine-controller"
+	controllerName     = "machine-controller"
+	nodeUnreachableKey = "node.kubernetes.io/unschedulable"
 )
 
 var (
@@ -66,6 +67,11 @@ var (
 	errNoControlPlaneNodes        = errors.New("no control plane members")
 	errClusterIsBeingDeleted      = errors.New("cluster is being deleted")
 	errControlPlaneIsBeingDeleted = errors.New("control plane is being deleted")
+	unreachableToleration         = corev1.Toleration{
+		Key:      nodeUnreachableKey,
+		Effect:   corev1.TaintEffectNoSchedule,
+		Operator: corev1.TolerationOpExists,
+	}
 )
 
 // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
@@ -618,6 +624,9 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
 		ErrOut: writer{func(msg string, keysAndValues ...interface{}) {
 			log.Error(nil, msg, keysAndValues...)
 		}},
+		AdditionalFilters: []kubedrain.PodFilter{
+			skipUnreachableTolerationPods,
+		},
 		// SPECTRO: Even if the node is reachable, we wait 30 minutes for drain completion else move ahead
 		SkipWaitForDeleteTimeoutSeconds: 60 * 30, // 30 minutes
 	}
@@ -643,6 +652,16 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
 	return ctrl.Result{}, nil
 }
 
+func skipUnreachableTolerationPods(pod corev1.Pod) kubedrain.PodDeleteStatus {
+	if pod.Spec.Tolerations == nil {
+		return kubedrain.MakePodDeleteStatusOkay()
+	}
+	if HasTolerations(&pod, &unreachableToleration) {
+		return kubedrain.MakePodDeleteStatusSkip()
+	}
+	return kubedrain.MakePodDeleteStatusOkay()
+}
+
 // shouldWaitForNodeVolumes returns true if node status still have volumes attached
 // pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
 // this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic

diff --git a/internal/controllers/machine/machine_helpers.go b/internal/controllers/machine/machine_helpers.go
@@ -17,6 +17,7 @@ limitations under the License.
 package machine
 
 import (
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 )
@@ -37,3 +38,12 @@ func HasMatchingLabels(matchSelector metav1.LabelSelector, matchLabels map[strin
 	}
 	return true
 }
+
+func HasTolerations(pod *corev1.Pod, toleration *corev1.Toleration) bool {
+	for _, t := range pod.Spec.Tolerations {
+		if t.MatchToleration(toleration) {
+			return true
+		}
+	}
+	return false
+}