Skip to content

Commit

Permalink
fix(sanitychecks): Refacto Checks run with classes
Browse files Browse the repository at this point in the history
  • Loading branch information
lperdereau committed Dec 3, 2024
1 parent cf72417 commit 983e7fd
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 141 deletions.
1 change: 0 additions & 1 deletion src/pvecontrol/actions/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,5 @@ def action_sanitycheck(proxmox, args):
# VM is running in cpu = host
# VM is running in cpu = qemu64
sc = SanityCheck(proxmox)
print(args.check)
sc.run(checks=args.check)
sc.display()
2 changes: 1 addition & 1 deletion src/pvecontrol/sanitycheck/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .sanitychecks import SanityCheck, DEFAULT_CHECKS_ORDER
from .sanitychecks import SanityCheck
15 changes: 11 additions & 4 deletions src/pvecontrol/sanitycheck/checks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from abc import ABC, abstractmethod
from enum import Enum

from pvecontrol.utils import fonts
Expand Down Expand Up @@ -30,15 +31,21 @@ def display(self, padding_max_size):
def __len__(self):
return len(self.message)

class Check:
class Check(ABC):

def __init__(self, type: CheckType, name: str, messages = None):
type = ""
name = ""

def __init__(self, proxmox, messages = None):
if messages is None:
messages = []
self.type = type
self.name = name
self.proxmox = proxmox
self.messages = messages

@abstractmethod
def run(self):
pass

def add_messages(self, messages):
if isinstance(messages, CheckMessage):
self.messages.append(messages)
Expand Down
32 changes: 4 additions & 28 deletions src/pvecontrol/sanitycheck/sanitychecks.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,19 @@
import importlib

from pvecontrol.cluster import PVECluster
from pvecontrol.sanitycheck.tests import DEFAULT_CHECKS, DEFAULT_CHECK_IDS


DEFAULT_CHECKS_ORDER = [
'nodes',
'ha_vms',
'ha_groups',
]

class SanityCheck():

def __init__(self, proxmox: PVECluster):
self._proxmox = proxmox
self._ha = self._proxmox.ha()
self._checks = []

def run(self, checks):
if not checks:
checks = DEFAULT_CHECKS_ORDER

from . import tests
pkgs = []
for pkg_name in checks:
pkg = None
try:
pkg = importlib.import_module(f"pvecontrol.sanitycheck.tests.{pkg_name}")
pkgs.append(pkg)
except ModuleNotFoundError:
print(
f"Sanity check '{pkg_name}' doesn't exists.\n"
f"Here available values are:\n{', '.join(DEFAULT_CHECKS_ORDER)}"
)
return
checks = DEFAULT_CHECK_IDS

for pkg in pkgs:
if pkg:
method = getattr(pkg, 'get_checks')
self._checks += method(self)
for id in checks:
DEFAULT_CHECKS[id](self._proxmox).run()

def _get_longest_message(self):
size = 0
Expand Down
22 changes: 9 additions & 13 deletions src/pvecontrol/sanitycheck/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
from pvecontrol.sanitycheck.checks import Check, CheckType, CheckMessage, CheckCode
from .nodes import Nodes
from .ha_groups import HaGroups
from .ha_vms import HaVms

# Check HA groups
def get_checks(sanity):
check = Check(CheckType.HA, "Check HA groups")
for group in sanity._ha['groups']:
num_nodes = len(group['nodes'].split(","))
if num_nodes < 2:
msg = f"Group {group['group']} contain only {num_nodes} node"
check.add_messages(CheckMessage(CheckCode.WARN, msg))
DEFAULT_CHECKS = {
Nodes.id: Nodes,
HaGroups.id: HaGroups,
HaVms.id: HaVms
}

if not check.messages:
msg = "HA Group checked"
check.add_messages(CheckMessage(CheckCode.OK, msg))
return check
DEFAULT_CHECK_IDS = DEFAULT_CHECKS.keys()
27 changes: 15 additions & 12 deletions src/pvecontrol/sanitycheck/tests/ha_groups.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from pvecontrol.sanitycheck.checks import Check, CheckType, CheckMessage, CheckCode


# Check HA groups
def get_checks(sanity):
check = Check(CheckType.HA, "Check HA groups")
for group in sanity._ha['groups']:
num_nodes = len(group['nodes'].split(","))
if num_nodes < 2:
msg = f"Group {group['group']} contain only {num_nodes} node"
check.add_messages(CheckMessage(CheckCode.WARN, msg))
class HaGroups(Check):

if not check.messages:
msg = "HA Group checked"
check.add_messages(CheckMessage(CheckCode.OK, msg))
return [check]
id = "ha_groups"
type = CheckType.HA
name = "Check HA groups"

def run(self):
for group in self.proxmox.ha()['groups']:
num_nodes = len(group['nodes'].split(","))
if num_nodes < 2:
msg = f"Group {group['group']} contain only {num_nodes} node"
self.add_messages(CheckMessage(CheckCode.WARN, msg))

if not self.messages:
msg = "HA Group checked"
self.add_messages(CheckMessage(CheckCode.OK, msg))
111 changes: 57 additions & 54 deletions src/pvecontrol/sanitycheck/tests/ha_vms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,57 +4,60 @@
from pvecontrol.sanitycheck.checks import Check, CheckType, CheckMessage, CheckCode


def _check_disk_ha_consistency(sanity, ha_vms):
messages = []
# Value are quite hard to find from ressources keys if it's a disk
regex = r"^(.*):(vm|base)-[0-9]+-(disk|cloudinit).*"
vms_not_consistent = []
for vm in ha_vms:
result = {'name': vm.name, 'node': vm.node, 'disks': []}
for k, v in vm.config.items():
if not isinstance(v, str):
continue
if regex_result := re.search(regex, v):
storage = sanity._proxmox.get_storage(regex_result.group(1))
if (
storage != None and
StorageShared[storage.shared] != StorageShared.shared
):
result['disks'].append(k)
if result['disks']:
vms_not_consistent.append(result)

for vm in vms_not_consistent:
msg = f"Node '{vm['node']}' has VM '{vm['name']}' with disk(s) '{', '.join(vm['disks'])}' not on shared storage"
messages.append(CheckMessage(CheckCode.WARN, msg))

return messages

def _check_cpu_ha_consistency(ha_vms):
messages = []
for vm in ha_vms:
if vm.config['cpu'] == 'host':
msg = f"Node '{vm.node}' has VM '{vm.name}' with cpu type host"
messages.append(CheckMessage(CheckCode.CRIT, msg))
else:
msg = f"Node '{vm.node}' has VM '{vm.name}' with cpu type {vm.config['cpu']}"
messages.append(CheckMessage(CheckCode.OK, msg))
return messages

# Check disk are shared
def get_checks(sanity):
check = Check(CheckType.HA, "Check VMs in a HA group")
ha_resources = [r for r in sanity._ha['resources'] if r['type'] in ['vm']]
ha_vms = []
for resource in ha_resources:
id = resource['sid'].split(':')[1] # "sid = vm:100"
if resource['type'] == 'vm':
ha_vms.append(sanity._proxmox.get_vm(id))

check.add_messages(_check_disk_ha_consistency(sanity, ha_vms))
check.add_messages(_check_cpu_ha_consistency(ha_vms))

if not check.messages:
msg = "HA VMS checked"
check.add_messages(CheckMessage(CheckCode.OK, msg))
return [check]
class HaVms(Check):

id = "ha_vms"
type = CheckType.HA
name = "Check VMs in a HA group"

def run(self):
ha_resources = [r for r in self.proxmox.ha()['resources'] if r['type'] in ['vm']]
ha_vms = []
for resource in ha_resources:
id = resource['sid'].split(':')[1] # "sid = vm:100"
if resource['type'] == 'vm':
ha_vms.append(self.proxmox.get_vm(id))

self.add_messages(self._check_disk_ha_consistency(ha_vms))
self.add_messages(self._check_cpu_ha_consistency(ha_vms))

if not self.messages:
msg = "HA VMS checked"
self.add_messages(CheckMessage(CheckCode.OK, msg))

def _check_disk_ha_consistency(self, ha_vms):
messages = []
# Value are quite hard to find from ressources keys if it's a disk
regex = r"^(.*):(vm|base)-[0-9]+-(disk|cloudinit).*"
vms_not_consistent = []
for vm in ha_vms:
result = {'name': vm.name, 'node': vm.node, 'disks': []}
for k, v in vm.config.items():
if not isinstance(v, str):
continue
if regex_result := re.search(regex, v):
storage = self.proxmox.get_storage(regex_result.group(1))
if (
storage != None and
StorageShared[storage.shared] != StorageShared.shared
):
result['disks'].append(k)
if result['disks']:
vms_not_consistent.append(result)

for vm in vms_not_consistent:
msg = f"Node '{vm['node']}' has VM '{vm['name']}' with disk(s) '{', '.join(vm['disks'])}' not on shared storage"
messages.append(CheckMessage(CheckCode.WARN, msg))

return messages

def _check_cpu_ha_consistency(self, ha_vms):
messages = []
for vm in ha_vms:
if vm.config['cpu'] == 'host':
msg = f"Node '{vm.node}' has VM '{vm.name}' with cpu type host"
messages.append(CheckMessage(CheckCode.CRIT, msg))
else:
msg = f"Node '{vm.node}' has VM '{vm.name}' with cpu type {vm.config['cpu']}"
messages.append(CheckMessage(CheckCode.OK, msg))
return messages
64 changes: 36 additions & 28 deletions src/pvecontrol/sanitycheck/tests/nodes.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
from pvecontrol.sanitycheck.checks import Check, CheckType, CheckMessage, CheckCode

def _check_cpu_overcommit(maxcpu, cpufactor, allocated_cpu):
return (maxcpu * cpufactor) <= allocated_cpu

def _check_mem_overcommit(max_mem, min_mem, allocated_mem):
return (allocated_mem + min_mem) >= max_mem

# Check nodes capacity
def get_checks(sanitity):
node_config = sanitity._proxmox.config['node']
check = Check(CheckType.Node, "Check Node capacity")
for node in sanitity._proxmox.nodes:
if _check_cpu_overcommit(node.maxcpu, node_config['cpufactor'], node.allocatedcpu):
msg = "Node %s is in cpu overcommit status: %s allocated but %s available"%(node.node, node.allocatedcpu, node.maxcpu)
check.add_messages(CheckMessage(CheckCode.CRIT, msg))
else:
msg = f"Node '{node.node}' isn't in cpu overcommit"
check.add_messages(CheckMessage(CheckCode.OK, msg))

for node in sanitity._proxmox.nodes:
if _check_mem_overcommit(node.allocatedmem, node_config['memoryminimum'], node.maxmem):
msg = f"Node '{node.node}' is in mem overcommit status: {node.allocatedmem} allocated but {node.maxmem} available"
check.add_messages(CheckMessage(CheckCode.CRIT, msg))
else:
msg = f"Node '{node.node}' isn't in cpu overcommit"
check.add_messages(CheckMessage(CheckCode.OK, msg))
return [check]
from pvecontrol.sanitycheck.checks import Check, CheckCode, CheckType, CheckMessage


class Nodes(Check):

id = "nodes"
type = CheckType.Node
name = "Check Node capacity"

def run(self):
self._check_cpu_overcommit()
self._check_mem_overcommit()

def _check_mem_overcommit(self):
for node in self.proxmox.nodes:
if self._mem_is_overcommited(node.allocatedmem, self.proxmox.config['node']['memoryminimum'], node.maxmem):
msg = f"Node '{node.node}' is in mem overcommit status: {node.allocatedmem} allocated but {node.maxmem} available"
self.add_messages(CheckMessage(CheckCode.CRIT, msg))
else:
msg = f"Node '{node.node}' isn't in cpu overcommit"
self.add_messages(CheckMessage(CheckCode.OK, msg))

def _check_cpu_overcommit(self):
for node in self.proxmox.nodes:
if self._cpu_is_overcommited(node.maxcpu, self.proxmox.config['node']['cpufactor'], node.allocatedcpu):
msg = f"Node {node.node} is in cpu overcommit status: {node.allocatedcpu} allocated but {node.maxcpu} available"
self.add_messages(CheckMessage(CheckCode.CRIT, msg))
else:
msg = f"Node '{node.node}' isn't in cpu overcommit"
self.add_messages(CheckMessage(CheckCode.OK, msg))

def _cpu_is_overcommited(self, maxcpu, cpufactor, allocated_cpu):
return (maxcpu * cpufactor) <= allocated_cpu

def _mem_is_overcommited(self, max_mem, min_mem, allocated_mem):
return (allocated_mem + min_mem) >= max_mem

0 comments on commit 983e7fd

Please sign in to comment.