diff --git a/pyproject.toml b/pyproject.toml index ec80137..859ad12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,8 @@ target-version = ["py38"] # Linting tools configuration [tool.ruff] line-length = 99 -select = ["E", "W", "F", "C", "N", "D", "I001"] -extend-ignore = [ +lint.select = ["E", "W", "F", "C", "N", "D", "I001"] +lint.extend-ignore = [ "D203", "D204", "D213", @@ -49,9 +49,9 @@ extend-ignore = [ "D409", "D413", ] -ignore = ["E501", "D107"] +lint.ignore = ["E501", "D107"] extend-exclude = ["__pycache__", "*.egg_info"] -per-file-ignores = {"tests/*" = ["D100","D101","D102","D103","D104"]} +lint.per-file-ignores = {"tests/*" = ["D100","D101","D102","D103","D104"]} -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 10 diff --git a/requirements.txt b/requirements.txt index 871fc88..9b8d6b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ ops==2.* influxdb==5.3.1 jinja2==3.1.2 +distro pycryptodome diff --git a/src/interface_prolog_epilog.py b/src/interface_prolog_epilog.py index 3d603f4..736de51 100644 --- a/src/interface_prolog_epilog.py +++ b/src/interface_prolog_epilog.py @@ -1,4 +1,5 @@ """Slurm Prolog and Epilog interface.""" + import json import logging diff --git a/src/slurmctld_ops.py b/src/slurmctld_ops.py index b23477e..44e65d7 100644 --- a/src/slurmctld_ops.py +++ b/src/slurmctld_ops.py @@ -1,38 +1,34 @@ """This module provides the SlurmManager.""" + import logging import os import shlex import shutil import socket import subprocess - from base64 import b64decode, b64encode from pathlib import Path from typing import Optional +import charms.operator_libs_linux.v0.apt as apt +import charms.operator_libs_linux.v1.systemd as systemd import distro - from Crypto.PublicKey import RSA from jinja2 import Environment, FileSystemLoader - from ops.framework import ( Object, StoredState, ) -import charms.operator_libs_linux.v0.apt as apt -import charms.operator_libs_linux.v1.systemd as systemd - - logger = logging.getLogger() -TEMPLATE_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / 'templates' +TEMPLATE_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / "templates" SLURM_PPA_KEY: str = """ -----BEGIN PGP PUBLIC KEY BLOCK----- -Comment: Hostname: +Comment: Hostname: Version: Hockeypuck 2.1.1-10-gec3b0e7 xsFNBGTuZb8BEACtJ1CnZe6/hv84DceHv+a54y3Pqq0gqED0xhTKnbj/E2ByJpmT @@ -89,7 +85,7 @@ def install(self) -> None: repositories = apt.RepositoryMapping() repositories.add(self._repo()) - # Install the slurmctld, slurm-client pakages. + # Install the slurmctld, slurm-client packages. try: # Run `apt-get update` apt.update() @@ -152,20 +148,17 @@ def __init__(self, charm, component): self._stored.set_default(slurm_version_set=False) """Set the initial values for attributes in the base class.""" - self._slurm_conf_template_name = 'slurm.conf.tmpl' - self._slurm_conf_path = self._slurm_conf_dir / 'slurm.conf' + self._slurm_conf_template_name = "slurm.conf.tmpl" + self._slurm_conf_path = self._slurm_conf_dir / "slurm.conf" + self._slurmctld_log_file = self._slurm_log_dir / "slurmctld.log" - self._slurmctld_log_file = self._slurm_log_dir / 'slurmctld.log' - - self._slurmctld_pid_file = self._slurm_pid_dir / 'slurmctld.pid' + self._slurmctld_pid_file = self._slurm_pid_dir / "slurmctld.pid" # NOTE: Come back to mitigate this configless cruft self._slurmctld_parameters = ["enable_configless"] - - self._slurm_conf_template_location = \ - TEMPLATE_DIR / self._slurm_conf_template_name + self._slurm_conf_template_location = TEMPLATE_DIR / self._slurm_conf_template_name @property def hostname(self) -> str: @@ -189,7 +182,7 @@ def slurm_is_active(self) -> bool: r = subprocess.check_output(shlex.split(cmd)) r = r.decode().strip().lower() logger.debug(f"### systemctl is-active {self._slurm_systemd_service}: {r}") - return 'active' == r + return "active" == r except subprocess.CalledProcessError as e: logger.error(f"#### Error checking if slurm is active: {e}") return False @@ -263,7 +256,7 @@ def _slurm_plugstack_dir(self) -> Path: @property def _slurm_plugstack_conf(self) -> Path: """Return the full path to the root plugstack configuration file.""" - return self._slurm_conf_dir / 'plugstack.conf' + return self._slurm_conf_dir / "plugstack.conf" @property def _slurm_systemd_service(self) -> str: @@ -300,119 +293,104 @@ def _slurmd_group(self) -> str: """Return the slurmd group.""" return "root" - def create_systemd_override_for_nofile(self): """Create the override.conf file for slurm systemd service.""" - systemd_override_dir = Path( - f"/etc/systemd/system/{self._slurm_systemd_service}.d" - ) + systemd_override_dir = Path(f"/etc/systemd/system/{self._slurm_systemd_service}.d") if not systemd_override_dir.exists(): systemd_override_dir.mkdir(exist_ok=True) - systemd_override_conf = systemd_override_dir / 'override.conf' - systemd_override_conf_tmpl = TEMPLATE_DIR / 'override.conf' + systemd_override_conf = systemd_override_dir / "override.conf" + systemd_override_conf_tmpl = TEMPLATE_DIR / "override.conf" shutil.copyfile(systemd_override_conf_tmpl, systemd_override_conf) - - def slurm_config_nhc_values(self, interval=600, state='ANY,CYCLE'): + def slurm_config_nhc_values(self, interval=600, state="ANY,CYCLE"): """NHC parameters for slurm.conf.""" - return {'nhc_bin': '/usr/sbin/omni-nhc-wrapper', - 'health_check_interval': interval, - 'health_check_node_state': state} + return { + "nhc_bin": "/usr/sbin/omni-nhc-wrapper", + "health_check_interval": interval, + "health_check_node_state": state, + } def write_acct_gather_conf(self, context: dict) -> None: """Render the acct_gather.conf.""" - template_name = 'acct_gather.conf.tmpl' + template_name = "acct_gather.conf.tmpl" source = TEMPLATE_DIR / template_name - target = self._slurm_conf_dir / 'acct_gather.conf' + target = self._slurm_conf_dir / "acct_gather.conf" - if not type(context) == dict: + if not isinstance(context, dict): raise TypeError("Incorrect type for config.") if not source.exists(): - raise FileNotFoundError( - "The acct_gather template cannot be found." - ) + raise FileNotFoundError("The acct_gather template cannot be found.") - rendered_template = Environment( - loader=FileSystemLoader(TEMPLATE_DIR) - ).get_template(template_name) + rendered_template = Environment(loader=FileSystemLoader(TEMPLATE_DIR)).get_template( + template_name + ) if target.exists(): target.unlink() - target.write_text( - rendered_template.render(context) - ) + target.write_text(rendered_template.render(context)) def remove_acct_gather_conf(self) -> None: """Remove acct_gather.conf.""" - - target = self._slurm_conf_dir / 'acct_gather.conf' + target = self._slurm_conf_dir / "acct_gather.conf" if target.exists(): target.unlink() def write_slurm_config(self, context) -> None: """Render the context to a template, adding in common configs.""" - common_config = { - 'munge_socket': str(self._munge_socket), - 'mail_prog': str(self._mail_prog), - 'slurm_state_dir': str(self._slurm_state_dir), - 'slurm_spool_dir': str(self._slurm_spool_dir), - 'slurm_plugin_dir': str(self._slurm_plugin_dir), - 'slurmctld_log_file': str(self._slurmctld_log_file), - 'slurmctld_pid_file': str(self._slurmctld_pid_file), - 'jwt_rsa_key_file': str(self._jwt_rsa_key_file), - 'slurmctld_parameters': ",".join(self._slurmctld_parameters), - 'slurm_plugstack_conf': str(self._slurm_plugstack_conf), - 'slurm_user': str(self._slurm_user), - 'slurmd_user': str(self._slurmd_user), + "munge_socket": str(self._munge_socket), + "mail_prog": str(self._mail_prog), + "slurm_state_dir": str(self._slurm_state_dir), + "slurm_spool_dir": str(self._slurm_spool_dir), + "slurm_plugin_dir": str(self._slurm_plugin_dir), + "slurmctld_log_file": str(self._slurmctld_log_file), + "slurmctld_pid_file": str(self._slurmctld_pid_file), + "jwt_rsa_key_file": str(self._jwt_rsa_key_file), + "slurmctld_parameters": ",".join(self._slurmctld_parameters), + "slurm_plugstack_conf": str(self._slurm_plugstack_conf), + "slurm_user": str(self._slurm_user), + "slurmd_user": str(self._slurmd_user), } template_name = self._slurm_conf_template_name source = self._slurm_conf_template_location target = self._slurm_conf_path - if not type(context) == dict: + if not isinstance(context, dict): raise TypeError("Incorrect type for config.") if not source.exists(): - raise FileNotFoundError( - "The slurm config template cannot be found." - ) + raise FileNotFoundError("The slurm config template cannot be found.") # Preprocess merging slurmctld_parameters if they exist in the context context_slurmctld_parameters = context.get("slurmctld_parameters") if context_slurmctld_parameters: slurmctld_parameters = list( - set(common_config["slurmctld_parameters"].split(",") - + context_slurmctld_parameters.split(",")) + set( + common_config["slurmctld_parameters"].split(",") + + context_slurmctld_parameters.split(",") + ) ) - common_config["slurmctld_parameters"] = ",".join( - slurmctld_parameters - ) + common_config["slurmctld_parameters"] = ",".join(slurmctld_parameters) context.pop("slurmctld_parameters") - rendered_template = Environment( - loader=FileSystemLoader(TEMPLATE_DIR) - ).get_template(template_name) + rendered_template = Environment(loader=FileSystemLoader(TEMPLATE_DIR)).get_template( + template_name + ) if target.exists(): target.unlink() - target.write_text( - rendered_template.render( - {**context, **common_config} - ) - ) + target.write_text(rendered_template.render({**context, **common_config})) user_group = f"{self._slurm_user}:{self._slurm_group}" subprocess.call(["chown", user_group, target]) - def write_munge_key(self, munge_key): """Base64 decode and write the munge key.""" key = b64decode(munge_key.encode()) @@ -420,7 +398,6 @@ def write_munge_key(self, munge_key): def write_jwt_rsa(self, jwt_rsa): """Write the jwt_rsa key.""" - # Remove jwt_rsa if exists. if self._jwt_rsa_key_file.exists(): self._jwt_rsa_key_file.write_bytes(os.urandom(2048)) @@ -430,15 +407,17 @@ def write_jwt_rsa(self, jwt_rsa): # chown to slurm_user. self._jwt_rsa_key_file.write_text(jwt_rsa) self._jwt_rsa_key_file.chmod(0o600) - subprocess.call([ - "chown", - self._slurm_user, - str(self._jwt_rsa_key_file), - ]) + subprocess.call( + [ + "chown", + self._slurm_user, + str(self._jwt_rsa_key_file), + ] + ) def write_cgroup_conf(self, content): """Write the cgroup.conf file.""" - cgroup_conf_path = self._slurm_conf_dir / 'cgroup.conf' + cgroup_conf_path = self._slurm_conf_dir / "cgroup.conf" cgroup_conf_path.write_text(content) def get_munge_key(self) -> str: @@ -462,10 +441,9 @@ def start_munged(self): def _is_active_munged(self): munge = self._munged_systemd_service try: - status = subprocess.check_output(f"systemctl is-active {munge}", - shell=True) + status = subprocess.check_output(f"systemctl is-active {munge}", shell=True) status = status.decode().strip() - if 'active' in status: + if "active" in status: logger.debug("#### Munge daemon active") return True else: @@ -477,7 +455,6 @@ def _is_active_munged(self): def check_munged(self) -> bool: """Check if munge is working correctly.""" - # check if systemd service unit is active if not self._is_active_munged(): return False @@ -486,13 +463,12 @@ def check_munged(self) -> bool: try: logger.debug("## Testing if munge is working correctly") cmd = "munge -n" - munge = subprocess.Popen(shlex.split(cmd), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - unmunge = subprocess.Popen(["unmunge"], - stdin=munge.stdout, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + munge = subprocess.Popen( + shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + unmunge = subprocess.Popen( + ["unmunge"], stdin=munge.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) munge.stdout.close() output = unmunge.communicate()[0] if "Success" in output.decode(): @@ -515,15 +491,15 @@ def _mail_prog(self) -> Path: return Path("/usr/bin/mail.mailutils") def slurm_version(self) -> str: - """Return slurm verion.""" + """Return slurm version.""" return Slurmctld().version() def munge_version(self) -> Optional[str]: - """Return munge verion.""" + """Return munge version.""" try: munge = apt.DebianPackage.from_installed_package("munge") except apt.PackageNotFoundError: - logger.error(f"munge package not found on system") + logger.error("munge package not found on system") return None return munge.fullversion @@ -536,7 +512,7 @@ def _install_slurm_from_apt(self) -> bool: Slurmctld().install() # symlink /usr/lib64/slurm -> /usr/lib/x86_64-linux-gnu/slurm-wlm/ to - # have "standard" location accross OSes + # have "standard" location across OSes lib64_slurm = Path("/usr/lib64/slurm") if lib64_slurm.exists(): lib64_slurm.unlink() @@ -548,17 +524,15 @@ def upgrade(self) -> bool: Slurmctld().upgrade_to_latest() # symlink /usr/lib64/slurm -> /usr/lib/x86_64-linux-gnu/slurm-wlm/ to - # have "standard" location accross OSes + # have "standard" location across OSes lib64_slurm = Path("/usr/lib64/slurm") if lib64_slurm.exists(): lib64_slurm.unlink() lib64_slurm.symlink_to("/usr/lib/x86_64-linux-gnu/slurm-wlm/") return True - def _setup_plugstack_dir_and_config(self) -> None: """Create plugstack directory and config.""" - # Create the plugstack config directory. plugstack_dir = self._slurm_plugstack_dir @@ -577,8 +551,7 @@ def _setup_plugstack_dir_and_config(self) -> None: plugstack_conf.write_text(f"include {plugstack_dir}/*.conf") def _setup_paths(self): - """Create needed paths with correct permisions.""" - + """Create needed paths with correct permissions.""" user = f"{self._slurm_user}:{self._slurm_group}" all_paths = [ @@ -601,7 +574,7 @@ def restart_munged(self) -> bool: logger.debug("## Restarting munge") systemd.service_restart("munge") except Exception("Error restarting munge") as e: - logger.error(e.meessage) + logger.error(e.message) return False return self.check_munged() @@ -614,7 +587,7 @@ def restart_slurmctld(self) -> bool: logger.debug("## Restarting slurmctld") systemd.service_restart("slurmctld") except Exception("Error restarting slurmctld") as e: - logger.error(e.meessage) + logger.error(e.message) return False return True @@ -628,7 +601,7 @@ def slurm_cmd(self, command, arg_string): def generate_jwt_rsa(self) -> str: """Generate the rsa key to encode the jwt with.""" - return RSA.generate(2048).export_key('PEM').decode() + return RSA.generate(2048).export_key("PEM").decode() @property def slurm_installed(self) -> bool: @@ -645,21 +618,35 @@ def fluentbit_config_slurm(self) -> list: """Return Fluentbit configuration parameters to forward Slurm logs.""" log_file = self._slurmctld_log_file - cfg = [{"input": [("name", "tail"), - ("path", log_file.as_posix()), - ("path_key", "filename"), - ("tag", "slurmctld"), - ("parser", "slurm")]}, - {"parser": [("name", "slurm"), - ("format", "regex"), - ("regex", r"^\[(?