From c43ae930db7d7e13cedfd8dd80b1ede0fd0dc4a4 Mon Sep 17 00:00:00 2001 From: Mohammed Naser Date: Thu, 10 Nov 2022 03:11:21 +0000 Subject: [PATCH] fix: move mhc to clusterclass --- magnum_cluster_api/driver.py | 6 +-- magnum_cluster_api/monitor.py | 61 ---------------------- magnum_cluster_api/objects.py | 6 --- magnum_cluster_api/resources.py | 89 +++++++++++++++------------------ 4 files changed, 43 insertions(+), 119 deletions(-) delete mode 100644 magnum_cluster_api/monitor.py diff --git a/magnum_cluster_api/driver.py b/magnum_cluster_api/driver.py index 9c6a8897..0068a425 100644 --- a/magnum_cluster_api/driver.py +++ b/magnum_cluster_api/driver.py @@ -14,9 +14,9 @@ import keystoneauth1 from magnum import objects as magnum_objects -from magnum.drivers.common import driver +from magnum.drivers.common import driver, k8s_monitor -from magnum_cluster_api import clients, monitor, objects, resources, utils +from magnum_cluster_api import clients, objects, resources, utils class BaseDriver(driver.Driver): @@ -244,7 +244,7 @@ def delete_nodegroup(self, context, cluster, nodegroup): ) def get_monitor(self, context, cluster): - return monitor.ClusterApiMonitor(context, cluster) + return k8s_monitor.K8sMonitor(context, cluster) # def rotate_ca_certificate(self, context, cluster): # raise exception.NotSupported( diff --git a/magnum_cluster_api/monitor.py b/magnum_cluster_api/monitor.py deleted file mode 100644 index ced2970d..00000000 --- a/magnum_cluster_api/monitor.py +++ /dev/null @@ -1,61 +0,0 @@ -from magnum.conductor import monitors -from magnum.objects import fields - -from magnum_cluster_api import clients, resources, utils - - -class ClusterApiMonitor(monitors.MonitorBase): - def __init__(self, context, cluster): - super(ClusterApiMonitor, self).__init__(context, cluster) - self.data = {} - self.k8s_api = clients.get_pykube_api() - - def metrics_spec(self): - raise NotImplementedError() - - def pull_data(self): - raise NotImplementedError() - - def _return_health_status(self, status, message): - self.data["health_status"] = status - self.data["health_status_reason"] = {"status": message} - - def poll_health_status(self): - """ - Poll for the health status of the cluster using the MachineHealthCheck - API using the management cluster. - """ - AUTO_HEAL_DISABLED = "The cluster does not have auto healing enabled" - NODES_UNHEALTHY = "The cluster has unhealthy nodes" - HEALTH_OK = "All nodes are healthy" - - if not utils.get_cluster_label_as_bool( - self.cluster, "auto_healing_enabled", True - ): - return self._return_health_status( - fields.ClusterHealthStatus.UNKNOWN, - AUTO_HEAL_DISABLED, - ) - - mhc = resources.MachineHealthCheck(self.k8s_api, self.cluster).get_object() - if not mhc.exists(): - return self._return_health_status( - fields.ClusterHealthStatus.UNKNOWN, - AUTO_HEAL_DISABLED, - ) - - mhc.reload() - - current_healthy = mhc.obj["status"]["currentHealthy"] - expected_machines = mhc.obj["status"]["expectedMachines"] - - if current_healthy != expected_machines: - return self._return_health_status( - fields.ClusterHealthStatus.UNHEALTHY, - NODES_UNHEALTHY, - ) - - return self._return_health_status( - fields.ClusterHealthStatus.HEALTHY, - HEALTH_OK, - ) diff --git a/magnum_cluster_api/objects.py b/magnum_cluster_api/objects.py index 89b9e9d6..113c8547 100644 --- a/magnum_cluster_api/objects.py +++ b/magnum_cluster_api/objects.py @@ -43,12 +43,6 @@ class Machine(pykube.objects.NamespacedAPIObject): kind = "Machine" -class MachineHealthCheck(pykube.objects.NamespacedAPIObject): - version = "cluster.x-k8s.io/v1beta1" - endpoint = "machinehealthchecks" - kind = "MachineHealthCheck" - - class OpenStackClusterTemplate(pykube.objects.NamespacedAPIObject): version = "infrastructure.cluster.x-k8s.io/v1alpha6" endpoint = "openstackclustertemplates" diff --git a/magnum_cluster_api/resources.py b/magnum_cluster_api/resources.py index 0f802eb9..6f2d6c83 100644 --- a/magnum_cluster_api/resources.py +++ b/magnum_cluster_api/resources.py @@ -329,49 +329,6 @@ def get_object(self) -> pykube.Secret: ) -class MachineHealthCheck(ClusterBase): - def get_object(self) -> objects.MachineHealthCheck: - return objects.MachineHealthCheck( - self.api, - { - "apiVersion": objects.MachineHealthCheck.version, - "kind": objects.MachineHealthCheck.kind, - "metadata": { - "name": utils.get_or_generate_cluster_api_name( - self.api, self.cluster - ), - "namespace": "magnum-system", - "labels": self.labels, - }, - "spec": { - "clusterName": utils.get_or_generate_cluster_api_name( - self.api, self.cluster - ), - "maxUnhealthy": "40%", - "selector": { - "matchLabels": { - "cluster.x-k8s.io/cluster-name": utils.get_or_generate_cluster_api_name( - self.api, self.cluster - ), - } - }, - "unhealthyConditions": [ - { - "type": "Ready", - "status": "False", - "timeout": "300s", - }, - { - "type": "Ready", - "status": "Unknown", - "timeout": "300s", - }, - ], - }, - }, - ) - - class KubeadmControlPlaneTemplate(Base): def get_object(self) -> objects.KubeadmControlPlaneTemplate: manifests_path = pkg_resources.resource_filename( @@ -535,6 +492,21 @@ def get_object(self) -> objects.ClusterClass: "name": CLUSTER_CLASS_NAME, }, }, + "machineHealthCheck": { + "maxUnhealthy": "33%", + "unhealthyConditions": [ + { + "type": "Ready", + "status": "False", + "timeout": "5m", + }, + { + "type": "Ready", + "status": "Unknown", + "timeout": "5m", + }, + ], + }, }, "infrastructure": { "ref": { @@ -563,6 +535,21 @@ def get_object(self) -> objects.ClusterClass: } }, }, + "machineHealthCheck": { + "maxUnhealthy": "33%", + "unhealthyConditions": [ + { + "type": "Ready", + "status": "False", + "timeout": "5m", + }, + { + "type": "Ready", + "status": "Unknown", + "timeout": "5m", + }, + ], + }, } ], }, @@ -1115,6 +1102,11 @@ def get_object(self) -> objects.Cluster: ), "controlPlane": { "replicas": self.cluster.master_count, + "machineHealthCheck": { + "enable": utils.get_cluster_label_as_bool( + self.cluster, "auto_healing_enabled", True + ) + }, }, "workers": { "machineDeployments": [ @@ -1125,6 +1117,11 @@ def get_object(self) -> objects.Cluster: "failureDomain": utils.get_cluster_label( self.cluster, "availability_zone", "" ), + "machineHealthCheck": { + "enable": utils.get_cluster_label_as_bool( + self.cluster, "auto_healing_enabled", True + ) + }, "variables": { "overrides": [ { @@ -1317,12 +1314,6 @@ def apply_cluster_from_magnum_cluster( ClusterResourceSet(api, cluster).apply() Cluster(context, api, cluster).apply() - # TODO: refactor into Cluster topology - if utils.get_cluster_label_as_bool(cluster, "auto_healing_enabled", True): - MachineHealthCheck(api, cluster).apply() - else: - MachineHealthCheck(api, cluster).delete() - def get_kubeadm_control_plane( api: pykube.HTTPClient, cluster: magnum_objects.Cluster