From 44b96dc96e90141614be4f96011412dcfc961ac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Fri, 6 Sep 2024 16:26:09 -0600 Subject: [PATCH] feat: abstract around base package manager This should enable switching between snaps and debs in any charm using the library. It also exposes the slurmutils config models instead of having wrapper methods around it, which overall moves the burden of config APIs to slurmutils. --- dev-requirements.txt | 10 +- lib/charms/hpc_libs/v0/slurm_ops.py | 414 +++++++++++++------- tests/integration/slurm_ops/test_manager.py | 49 +-- tests/unit/test_slurm_ops.py | 316 +++++++++++---- tox.ini | 5 +- 5 files changed, 553 insertions(+), 241 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 5960b53..7d8070d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,2 +1,10 @@ +# lib deps +slurmutils ~= 0.6.0 +python-dotenv ~= 1.0.1 +pyyaml >= 6.0.2 + +# tests deps +coverage[toml] ~= 7.6 +pyfakefs ~= 5.6.0 pytest ~= 7.2 -pytest-order ~= 1.1 \ No newline at end of file +pytest-order ~= 1.1 diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index b48f0a3..2da2ee5 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -26,14 +26,13 @@ the inheriting Slurm service manager how to manage its corresponding Slurm service on the host. ```python3 -import charms.hpc_libs.v0.slurm_ops as slurm -from charms.hpc_libs.v0.slurm_ops import SlurmManagerBase, ServiceType +from charms.hpc_libs.v0.slurm_ops import SlurmManagerBase, ServiceType, class SlurmctldManager(SlurmManagerBase): # Manage `slurmctld` service on host. def __init__(self) -> None: - super().__init__(ServiceType.SLURMCTLD) + super().__init__(ServiceType.SLURMCTLD, snap=True) class ApplicationCharm(CharmBase): @@ -49,32 +48,41 @@ def __init__(self, *args, **kwargs) -> None: ) def _on_install(self, _) -> None: - slurm.install() - self.unit.set_workload_version(slurm.version()) + self._slurm_manager.install() + self.unit.set_workload_version(self._slurm_manager.version()) self._slurm_manager.config.set({"cluster-name": "cluster"}) ``` """ __all__ = [ - "format_key", - "install", - "version", - "ConfigurationManager", + "SlurmOpsError", "ServiceType", + "SlurmOpsManager", + "ServiceManager", + "MungeKeyManager", + "MungeManager", + "SnapManager", "SlurmManagerBase", - "SlurmOpsError", + "SlurmctldManager", + "SlurmdManager", + "SlurmdbdManager", + "SlurmrestdManager", ] -import json import logging -import re +import os import socket import subprocess +from abc import ABC, abstractmethod from collections.abc import Mapping +from contextlib import contextmanager from enum import Enum -from typing import Any, Optional +from pathlib import Path +from typing import Any, Optional, Union +import dotenv import yaml +from slurmutils.editors import slurmconfig, slurmdbdconfig # The unique Charmhub library identifier, never change it LIBID = "541fd767f90b40539cf7cd6e7db8fabf" @@ -84,58 +92,12 @@ def _on_install(self, _) -> None: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 5 +LIBPATCH = 6 # Charm library dependencies to fetch during `charmcraft pack`. -PYDEPS = ["pyyaml>=6.0.1"] +PYDEPS = ["pyyaml>=6.0.2", "python-dotenv~=1.0.1", "slurmutils~=0.6.0"] _logger = logging.getLogger(__name__) -_acronym = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])") -_kebabize = re.compile(r"(?<=[a-z0-9])(?=[A-Z])") - - -class SlurmOpsError(Exception): - """Exception raised when a slurm operation failed.""" - - @property - def message(self) -> str: - """Return message passed as argument to exception.""" - return self.args[0] - - -def format_key(key: str) -> str: - """Format Slurm configuration keys from SlurmCASe into kebab case. - - Args: - key: Slurm configuration key to convert to kebab case. - - Notes: - Slurm configuration syntax does not follow proper PascalCasing - format, so we cannot put keys directly through a kebab case converter - to get the desired format. Some additional processing is needed for - certain keys before the key can properly kebabized. - - For example, without additional preprocessing, the key `CPUs` will - become `cp-us` if put through a kebabizer with being preformatted to `Cpus`. - """ - if "CPUs" in key: - key = key.replace("CPUs", "Cpus") - key = _acronym.sub(r"-", key) - return _kebabize.sub(r"-", key).lower() - - -def install() -> None: - """Install Slurm.""" - # FIXME: Pin slurm to the stable channel - _snap("install", "slurm", "--channel", "latest/candidate", "--classic") - - -def version() -> str: - """Get the current version of Slurm installed on the system.""" - info = yaml.safe_load(_snap("info", "slurm")) - if (ver := info.get("installed")) is None: - raise SlurmOpsError("unable to retrive snap info. Ensure slurm is correctly installed") - return ver.split(maxsplit=1)[0] def _call(cmd: str, *args: str, stdin: Optional[str] = None) -> str: @@ -151,7 +113,7 @@ def _call(cmd: str, *args: str, stdin: Optional[str] = None) -> str: except subprocess.CalledProcessError as e: _logger.error(f"`{' '.join(cmd)}` failed") _logger.error(f"stderr: {e.stderr.decode()}") - raise SlurmOpsError(f"command {cmd[0]} failed. Reason:\n{e.stderr.decode()}") + raise SlurmOpsError(f"command {cmd[0]} failed. reason:\n{e.stderr.decode()}") def _snap(*args) -> str: @@ -163,17 +125,13 @@ def _snap(*args) -> str: return _call("snap", *args) -def _mungectl(*args: str, stdin: Optional[str] = None) -> str: - """Control munge via `slurm.mungectl ...`. - - Args: - *args: Arguments to pass to `mungectl`. - stdin: Input to pass to `mungectl` via stdin. +class SlurmOpsError(Exception): + """Exception raised when a slurm operation failed.""" - Raises: - subprocess.CalledProcessError: Raised if `mungectl` command fails. - """ - return _call("slurm.mungectl", *args, stdin=stdin) + @property + def message(self) -> str: + """Return message passed as argument to exception.""" + return self.args[0] class ServiceType(Enum): @@ -197,9 +155,213 @@ def config_name(self) -> str: return self.value -class ServiceManager: +class ServiceManager(ABC): + """Control a Slurm service.""" + + @abstractmethod + def __init__(self, service: ServiceType) -> None: ... + + @abstractmethod + def enable(self) -> None: + """Enable service.""" + + @abstractmethod + def disable(self) -> None: + """Disable service.""" + + @abstractmethod + def restart(self) -> None: + """Restart service.""" + + @abstractmethod + def active(self) -> bool: + """Return True if the service is active.""" + + @property + @abstractmethod + def type(self) -> ServiceType: + """Return the service type of the managed service.""" + + +class MungeKeyManager(ABC): + """Control the munge key.""" + + @abstractmethod + def get(self) -> str: + """Get the current munge key. + + Returns: + The current munge key as a base64-encoded string. + """ + + @abstractmethod + def set(self, key: str) -> None: + """Set a new munge key. + + Args: + key: A new, base64-encoded munge key. + """ + + @abstractmethod + def generate(self) -> None: + """Generate a new, cryptographically secure munge key.""" + + +class _EnvManager: + """Control configuration of environment variables used in Slurm components. + + Every configuration value is automatically uppercased and prefixed with the service name. + """ + + def __init__(self, file: Union[str, os.PathLike], prefix: str) -> None: + self._file: Path = Path(file) + self._service = prefix + + def _config_to_env_var(self, key: str) -> str: + """Get the environment variable corresponding to the configuration `key`.""" + return self._service.replace("-", "_").upper() + "_" + key + + def get(self, key: str) -> Optional[str]: + """Get specific environment variable for service.""" + return dotenv.get_key(self._file, self._config_to_env_var(key)) + + def set(self, config: Mapping[str, Any]) -> None: + """Set environment variable for service.""" + for key, value in config.items(): + dotenv.set_key(self._file, self._config_to_env_var(key), str(value)) + + def unset(self, key: str) -> None: + """Unset environment variable for service.""" + dotenv.unset_key(self._file, self._config_to_env_var(key)) + + +class SlurmOpsManager(ABC): + """Manager to control the installation, creation and configuration of Slurm-related services.""" + + @abstractmethod + def install(self) -> None: + """Install Slurm.""" + + @abstractmethod + def version(self) -> str: + """Get the current version of Slurm installed on the system.""" + + @property + @abstractmethod + def slurm_path(self) -> Path: + """Get the path to the Slurm configuration directory.""" + + @abstractmethod + def service_manager_for(self, type: ServiceType) -> ServiceManager: + """Return the `ServiceManager` for the specified `ServiceType`.""" + + @abstractmethod + def _env_manager_for(self, type: ServiceType) -> _EnvManager: + """Return the `_EnvManager` for the specified `ServiceType`.""" + + @abstractmethod + def munge_key_manager(self) -> MungeKeyManager: + """Get the `MungekeyManager` of this ops manager.""" + + +class MungeManager: + """Manage `munged` service operations.""" + + def __init__(self, ops_manager: SlurmOpsManager) -> None: + self.service = ops_manager.service_manager_for(ServiceType.MUNGED) + self.key = ops_manager.munge_key_manager() + + +class PrometheusExporterManager: + """Manage `slurm-prometheus-exporter` service operations.""" + + def __init__(self, ops_manager: SlurmOpsManager) -> None: + self.service = ops_manager.service_manager_for(ServiceType.PROMETHEUS_EXPORTER) + + +class SlurmManagerBase: + """Base manager for Slurm services.""" + + def __init__(self, service: ServiceType, snap: bool = False) -> None: + if not snap: + raise SlurmOpsError("deb packaging is currently unimplemented") + self._ops_manager = SnapManager() + self.service = self._ops_manager.service_manager_for(service) + self.munge = MungeManager(self._ops_manager) + self.exporter = PrometheusExporterManager(self._ops_manager) + self.install = self._ops_manager.install + self.version = self._ops_manager.version + + @property + def hostname(self) -> str: + """The hostname where this manager is running.""" + return socket.gethostname().split(".")[0] + + +class SlurmctldManager(SlurmManagerBase): + """Manager for the Slurmctld service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=ServiceType.SLURMCTLD, *args, **kwargs) + self._config_path = self._ops_manager.slurm_path / "slurm.conf" + + @contextmanager + def config(self) -> slurmconfig.SlurmConfig: + """Get the config manager of slurmctld.""" + with slurmconfig.edit(self._config_path) as config: + yield config + + +class SlurmdManager(SlurmManagerBase): + """Manager for the Slurmd service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=ServiceType.SLURMD, *args, **kwargs) + self._env_manager = self._ops_manager._env_manager_for(ServiceType.SLURMD) + + @property + def config_server(self) -> str: + """Get the config server address of this Slurmd node.""" + return self._env_manager.get("CONFIG_SERVER") + + @config_server.setter + def config_server(self, addr: str) -> None: + """Set the config server address of this Slurmd node.""" + self._env_manager.set({"CONFIG_SERVER": addr}) + + @config_server.deleter + def config_server(self) -> None: + """Unset the config server address of this Slurmd node.""" + self._env_manager.unset("CONFIG_SERVER") + + +class SlurmdbdManager(SlurmManagerBase): + """Manager for the Slurmdbd service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=ServiceType.SLURMDBD, *args, **kwargs) + self._config_path = self._ops_manager.slurm_path / "slurmdbd.conf" + + @contextmanager + def config(self) -> slurmdbdconfig.SlurmdbdConfig: + """Get the config manager of slurmctld.""" + with slurmdbdconfig.edit(self._config_path) as config: + yield config + + +class SlurmrestdManager(SlurmManagerBase): + """Manager for the Slurmrestd service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=ServiceType.SLURMRESTD, *args, **kwargs) + + +class _SnapServiceManager(ServiceManager): """Control a Slurm service.""" + def __init__(self, service: ServiceType) -> None: + self._service = service + def enable(self) -> None: """Enable service.""" _snap("start", "--enable", f"slurm.{self._service.value}") @@ -216,93 +378,83 @@ def active(self) -> bool: """Return True if the service is active.""" info = yaml.safe_load(_snap("info", "slurm")) if (services := info.get("services")) is None: - raise SlurmOpsError("unable to retrive snap info. Ensure slurm is correctly installed") + raise SlurmOpsError("unable to retrive snap info. ensure slurm is correctly installed") # Assume `services` contains the service, since `ServiceManager` is not exposed as a # public interface for now. # We don't do `"active" in state` because the word "active" is also part of "inactive" :) return "inactive" not in services[f"slurm.{self._service.value}"] + @property + def type(self) -> ServiceType: + """Return the service type of the managed service.""" + return self._service -class ConfigurationManager: - """Control configuration of a Slurm component.""" - - def __init__(self, name: str) -> None: - self._name = name - - def get_options(self, *keys: str) -> Mapping[str, Any]: - """Get given configurations values for Slurm component.""" - configs = {} - for key in keys: - config = self.get(key) - target = key.rsplit(".", maxsplit=1)[-1] - configs[target] = config - - return configs - - def get(self, key: Optional[str] = None) -> Any: - """Get specific configuration value for Slurm component.""" - key = f"{self._name}.{key}" if key else self._name - config = json.loads(_snap("get", "-d", "slurm", key)) - return config[key] - def set(self, config: Mapping[str, Any]) -> None: - """Set configuration for Slurm component.""" - args = [f"{self._name}.{k}={json.dumps(v)}" for k, v in config.items()] - _snap("set", "slurm", *args) +class _SnapMungeKeyManager(MungeKeyManager): + """Control the munge key using Snap.""" - def unset(self, *keys: str) -> None: - """Unset configuration for Slurm component.""" - args = [f"{self._name}.{k}" for k in keys] if len(keys) > 0 else [self._name] - _snap("unset", "slurm", *args) + def _mungectl(self, *args: str, stdin: Optional[str] = None) -> str: + """Control munge via `slurm.mungectl ...`. + Args: + *args: Arguments to pass to `mungectl`. + stdin: Input to pass to `mungectl` via stdin. -class MungeManager(ServiceManager): - """Manage `munged` service operations.""" - - def __init__(self) -> None: - service = ServiceType.MUNGED - self._service = service - self.config = ConfigurationManager(service.config_name) + Raises: + subprocess.CalledProcessError: Raised if `mungectl` command fails. + """ + return _call("slurm.mungectl", *args, stdin=stdin) - def get_key(self) -> str: + def get(self) -> str: """Get the current munge key. Returns: The current munge key as a base64-encoded string. """ - return _mungectl("key", "get") + return self._mungectl("key", "get") - def set_key(self, key: str) -> None: + def set(self, key: str) -> None: """Set a new munge key. Args: key: A new, base64-encoded munge key. """ - _mungectl("key", "set", stdin=key) + self._mungectl("key", "set", stdin=key) - def generate_key(self) -> None: + def generate(self) -> None: """Generate a new, cryptographically secure munge key.""" - _mungectl("key", "generate") + self._mungectl("key", "generate") -class PrometheusExporterManager(ServiceManager): - """Manage `slurm-prometheus-exporter` service operations.""" +class SnapManager(SlurmOpsManager): + """Slurm ops manager that uses Snap as its package manager.""" - def __init__(self) -> None: - self._service = ServiceType.PROMETHEUS_EXPORTER + def install(self) -> None: + """Install Slurm using the `slurm` snap.""" + # FIXME: Pin slurm to the stable channel + _snap("install", "slurm", "--channel", "latest/candidate", "--classic") + + def version(self) -> str: + """Get the current version of the `slurm` snap installed on the system.""" + info = yaml.safe_load(_snap("info", "slurm")) + if (ver := info.get("installed")) is None: + raise SlurmOpsError("unable to retrive snap info. ensure slurm is correctly installed") + return ver.split(maxsplit=1)[0] + @property + def slurm_path(self) -> Path: + """Get the path to the Slurm configuration directory.""" + return Path("/var/snap/slurm/common/etc/slurm") -class SlurmManagerBase(ServiceManager): - """Base manager for Slurm services.""" + def service_manager_for(self, type: ServiceType) -> ServiceManager: + """Return the `ServiceManager` for the specified `ServiceType`.""" + return _SnapServiceManager(type) - def __init__(self, service: ServiceType) -> None: - self._service = service - self.config = ConfigurationManager(service.config_name) - self.munge = MungeManager() - self.exporter = PrometheusExporterManager() + def _env_manager_for(self, type: ServiceType) -> _EnvManager: + """Return the `_EnvManager` for the specified `ServiceType`.""" + return _EnvManager(file="/var/snap/slurm/common/.env", prefix=type.value) - @property - def hostname(self) -> str: - """The hostname where this manager is running.""" - return socket.gethostname().split(".")[0] + def munge_key_manager(self) -> MungeKeyManager: + """Get the `MungekeyManager` class of this ops manager.""" + return _SnapMungeKeyManager() diff --git a/tests/integration/slurm_ops/test_manager.py b/tests/integration/slurm_ops/test_manager.py index bc77dfc..0f88c25 100644 --- a/tests/integration/slurm_ops/test_manager.py +++ b/tests/integration/slurm_ops/test_manager.py @@ -3,69 +3,70 @@ # See LICENSE file for licensing details. import base64 +from pathlib import Path import pytest -import lib.charms.hpc_libs.v0.slurm_ops as slurm -from lib.charms.hpc_libs.v0.slurm_ops import ServiceType, SlurmManagerBase +from lib.charms.hpc_libs.v0.slurm_ops import SlurmctldManager @pytest.fixture -def slurmctld() -> SlurmManagerBase: - return SlurmManagerBase(ServiceType.SLURMCTLD) +def slurmctld() -> SlurmctldManager: + return SlurmctldManager(snap=True) @pytest.mark.order(1) -def test_install(slurmctld: SlurmManagerBase) -> None: +def test_install(slurmctld: SlurmctldManager) -> None: """Install Slurm using the manager.""" - slurm.install() - slurmctld.munge.generate_key() + slurmctld.install() + slurmctld.munge.key.generate() with open("/var/snap/slurm/common/etc/munge/munge.key", "rb") as f: key: str = base64.b64encode(f.read()).decode() - assert key == slurmctld.munge.get_key() + assert key == slurmctld.munge.key.get() @pytest.mark.order(2) -def test_rotate_key(slurmctld: SlurmManagerBase) -> None: +def test_rotate_key(slurmctld: SlurmctldManager) -> None: """Test that the munge key can be rotated.""" - old_key = slurmctld.munge.get_key() - slurmctld.munge.generate_key() - new_key = slurmctld.munge.get_key() + old_key = slurmctld.munge.key.get() + slurmctld.munge.key.generate() + new_key = slurmctld.munge.key.get() assert old_key != new_key @pytest.mark.order(3) -def test_slurm_config(slurmctld: SlurmManagerBase) -> None: +def test_slurm_config(slurmctld: SlurmctldManager) -> None: """Test that the slurm config can be changed.""" - slurmctld.config.set({"slurmctld-host": "test-slurm-ops", "cluster-name": "test-cluster"}) - value = slurmctld.config.get("cluster-name") - assert value == "test-cluster" + with slurmctld.config() as config: + config.slurmctld_host = ["test-slurm-ops"] + config.cluster_name = "test-cluster" - with open("/var/snap/slurm/common/etc/slurm/slurm.conf", "r") as f: - output = f.read() + print(Path("/var/snap/slurm/common/etc/slurm/slurm.conf").read_text()) - for line in output.splitlines(): + for line in Path("/var/snap/slurm/common/etc/slurm/slurm.conf").read_text().splitlines(): entry = line.split("=") if len(entry) != 2: continue key, value = entry if key == "ClusterName": assert value == "test-cluster" + if key == "SlurmctldHost": + assert value == "test-slurm-ops" @pytest.mark.order(4) -def test_enable_service(slurmctld: SlurmManagerBase) -> None: +def test_enable_service(slurmctld: SlurmctldManager) -> None: """Test that the slurmctl daemon can be enabled.""" - slurmctld.enable() - assert slurmctld.active() + slurmctld.service.enable() + assert slurmctld.service.active() @pytest.mark.order(5) -def test_version() -> None: +def test_version(slurmctld: SlurmctldManager) -> None: """Test that the Slurm manager can report its version.""" - version = slurm.version() + version = slurmctld.version() # We are interested in knowing that this does not return a falsy value (`None`, `''`, `[]`, etc.) assert version diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 6d12d77..e47e7db 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -6,11 +6,22 @@ import base64 import subprocess +from pathlib import Path from unittest import TestCase from unittest.mock import patch import charms.hpc_libs.v0.slurm_ops as slurm -from charms.hpc_libs.v0.slurm_ops import ServiceType, SlurmManagerBase, SlurmOpsError +import dotenv +from charms.hpc_libs.v0.slurm_ops import ( + ServiceType, + SlurmctldManager, + SlurmdbdManager, + SlurmdManager, + SlurmManagerBase, + SlurmOpsError, + SnapManager, +) +from pyfakefs.fake_filesystem_unittest import TestCase as FsTestCase MUNGEKEY = b"1234567890" MUNGEKEY_BASE64 = base64.b64encode(MUNGEKEY) @@ -60,15 +71,22 @@ @patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") class TestSlurmOps(TestCase): + def test_error_message(self, *_) -> None: + """Test that `SlurmOpsError` stores the correct message.""" + message = "error message!" + self.assertEqual(SlurmOpsError(message).message, message) + - def test_format_key(self, _) -> None: - """Test that `kebabize` properly formats slurm keys.""" - self.assertEqual(slurm.format_key("CPUs"), "cpus") - self.assertEqual(slurm.format_key("AccountingStorageHost"), "accounting-storage-host") +@patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") +class TestSnapPackageManager(FsTestCase): + def setUp(self): + self.manager = SnapManager() + self.setUpPyfakefs() + self.fs.create_file("/var/snap/slurm/common/.env") def test_install(self, subcmd) -> None: """Test that `slurm_ops` calls the correct install command.""" - slurm.install() + self.manager.install() args = subcmd.call_args[0][0] self.assertEqual(args[:3], ["snap", "install", "slurm"]) self.assertIn("--classic", args[3:]) # codespell:ignore @@ -76,7 +94,7 @@ def test_install(self, subcmd) -> None: def test_version(self, subcmd) -> None: """Test that `slurm_ops` gets the correct version using the correct command.""" subcmd.return_value = SLURM_INFO.encode() - version = slurm.version() + version = self.manager.version() args = subcmd.call_args[0][0] self.assertEqual(args, ["snap", "info", "slurm"]) self.assertEqual(version, "23.11.7") @@ -85,7 +103,7 @@ def test_version_not_installed(self, subcmd) -> None: """Test that `slurm_ops` throws when getting the installed version if the slurm snap is not installed.""" subcmd.return_value = SLURM_INFO_NOT_INSTALLED.encode() with self.assertRaises(slurm.SlurmOpsError): - slurm.version() + self.manager.version() args = subcmd.call_args[0][0] self.assertEqual(args, ["snap", "info", "slurm"]) @@ -93,114 +111,68 @@ def test_call_error(self, subcmd) -> None: """Test that `slurm_ops` propagates errors when a command fails.""" subcmd.side_effect = subprocess.CalledProcessError(-1, cmd=[""], stderr=b"error") with self.assertRaises(slurm.SlurmOpsError): - slurm.install() - - def test_error_message(self, *_) -> None: - """Test that `SlurmOpsError` stores the correct message.""" - message = "error message!" - self.assertEqual(SlurmOpsError(message).message, message) + self.manager.install() @patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") class SlurmOpsBase: """Test the Slurm service operations managers.""" + def setUp(self): + self.setUpPyfakefs() + self.fs.create_file("/var/snap/slurm/common/.env") + def test_config_name(self, *_) -> None: """Test that the config name is correctly set.""" - self.assertEqual(self.manager._service.config_name, self.config_name) + self.assertEqual(self.manager.service.type.config_name, self.config_name) def test_enable(self, subcmd, *_) -> None: """Test that the manager calls the correct enable command.""" - self.manager.enable() + self.manager.service.enable() args = subcmd.call_args[0][0] self.assertEqual( - args, ["snap", "start", "--enable", f"slurm.{self.manager._service.value}"] + args, ["snap", "start", "--enable", f"slurm.{self.manager.service.type.value}"] ) def test_disable(self, subcmd, *_) -> None: """Test that the manager calls the correct disable command.""" - self.manager.disable() + self.manager.service.disable() args = subcmd.call_args[0][0] self.assertEqual( - args, ["snap", "stop", "--disable", f"slurm.{self.manager._service.value}"] + args, ["snap", "stop", "--disable", f"slurm.{self.manager.service.type.value}"] ) def test_restart(self, subcmd, *_) -> None: """Test that the manager calls the correct restart command.""" - self.manager.restart() + self.manager.service.restart() args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "restart", f"slurm.{self.manager._service.value}"]) + self.assertEqual(args, ["snap", "restart", f"slurm.{self.manager.service.type.value}"]) - def test_active(self, subcmd, *_) -> None: + def test_active(self, subcmd) -> None: """Test that the manager can detect that a service is active.""" subcmd.return_value = SLURM_INFO.encode() - self.assertTrue(self.manager.active()) + self.assertTrue(self.manager.service.active()) def test_active_not_installed(self, subcmd, *_) -> None: """Test that the manager throws an error when calling `active` if the snap is not installed.""" subcmd.return_value = SLURM_INFO_NOT_INSTALLED.encode() with self.assertRaises(slurm.SlurmOpsError): - self.manager.active() + self.manager.service.active() args = subcmd.call_args[0][0] self.assertEqual(args, ["snap", "info", "slurm"]) - def test_get_options(self, subcmd) -> None: - """Test that the manager correctly collects all requested configuration options.""" - subcmd.return_value = '{"%(name)s.key1": "value1", "%(name)s.key2": "value2"}' % { - "name": self.config_name - } - value = self.manager.config.get_options("key1", "key2") - calls = [args[0][0] for args in subcmd.call_args_list] - self.assertEqual(calls[0], ["snap", "get", "-d", "slurm", f"{self.config_name}.key1"]) - self.assertEqual(calls[1], ["snap", "get", "-d", "slurm", f"{self.config_name}.key2"]) - self.assertEqual(value, {"key1": "value1", "key2": "value2"}) - - def test_get_config(self, subcmd, *_) -> None: - """Test that the manager calls the correct `snap get ...` command.""" - subcmd.return_value = '{"%s.key": "value"}' % self.config_name - value = self.manager.config.get("key") - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "get", "-d", "slurm", f"{self.config_name}.key"]) - self.assertEqual(value, "value") - - def test_get_config_all(self, subcmd) -> None: - """Test that manager calls the correct `snap get ...` with no arguments given.""" - subcmd.return_value = '{"%s": "value"}' % self.config_name - value = self.manager.config.get() - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "get", "-d", "slurm", self.config_name]) - self.assertEqual(value, "value") - - def test_set_config(self, subcmd, *_) -> None: - """Test that the manager calls the correct `snap set ...` command.""" - self.manager.config.set({"key": "value"}) - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "set", "slurm", f'{self.config_name}.key="value"']) - - def test_unset_config(self, subcmd) -> None: - """Test that the manager calls the correct `snap unset ...` command.""" - self.manager.config.unset("key") - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "unset", "slurm", f"{self.config_name}.key"]) - - def test_unset_config_all(self, subcmd) -> None: - """Test the manager calls the correct `snap unset ...` with no arguments given.""" - self.manager.config.unset() - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "unset", "slurm", self.config_name]) - def test_generate_munge_key(self, subcmd, *_) -> None: """Test that the manager calls the correct `mungectl` command.""" - self.manager.munge.generate_key() + self.manager.munge.key.generate() args = subcmd.call_args[0][0] self.assertEqual(args, ["slurm.mungectl", "key", "generate"]) def test_set_munge_key(self, subcmd, *_) -> None: """Test that the manager sets the munge key with the correct command.""" - self.manager.munge.set_key(MUNGEKEY_BASE64) + self.manager.munge.key.set(MUNGEKEY_BASE64) args = subcmd.call_args[0][0] # MUNGEKEY_BASE64 is piped to `stdin` to avoid exposure. self.assertEqual(args, ["slurm.mungectl", "key", "set"]) @@ -208,16 +180,15 @@ def test_set_munge_key(self, subcmd, *_) -> None: def test_get_munge_key(self, subcmd, *_) -> None: """Test that the manager gets the munge key with the correct command.""" subcmd.return_value = MUNGEKEY_BASE64 - key = self.manager.munge.get_key() + key = self.manager.munge.key.get() args = subcmd.call_args[0][0] self.assertEqual(args, ["slurm.mungectl", "key", "get"]) self.assertEqual(key, MUNGEKEY_BASE64) - def test_configure_munge(self, subcmd) -> None: + def test_configure_munge(self, *_) -> None: """Test that manager is able to correctly configure munge.""" - self.manager.munge.config.set({"max-thread-count": 24}) - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "set", "slurm", "munge.max-thread-count=24"]) + self.manager.munge.max_thread_count = 24 + self.assertEqual(self.manager.munge.max_thread_count, 24) @patch("charms.hpc_libs.v0.slurm_ops.socket.gethostname") def test_hostname(self, gethostname, *_) -> None: @@ -229,19 +200,200 @@ def test_hostname(self, gethostname, *_) -> None: parameters = [ - (SlurmManagerBase(ServiceType.SLURMCTLD), "slurm"), - (SlurmManagerBase(ServiceType.SLURMD), "slurmd"), - (SlurmManagerBase(ServiceType.SLURMDBD), "slurmdbd"), - (SlurmManagerBase(ServiceType.SLURMRESTD), "slurmrestd"), + (SlurmManagerBase(ServiceType.SLURMCTLD, snap=True), "slurm"), + (SlurmManagerBase(ServiceType.SLURMD, snap=True), "slurmd"), + (SlurmManagerBase(ServiceType.SLURMDBD, snap=True), "slurmdbd"), + (SlurmManagerBase(ServiceType.SLURMRESTD, snap=True), "slurmrestd"), ] for manager, config_name in parameters: - cls_name = f"Test{manager._service.value.capitalize()}Ops" + cls_name = f"Test{manager.service.type.value.capitalize()}Ops" globals()[cls_name] = type( cls_name, - (SlurmOpsBase, TestCase), + (SlurmOpsBase, FsTestCase), { "manager": manager, "config_name": config_name, }, ) + + +@patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") +class TestSlurmctldConfig(FsTestCase): + """Test the Slurmctld service config manager.""" + + EXAMPLE_SLURM_CONF = """# +# `slurm.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. +# +SlurmctldHost=juju-c9fc6f-0(10.152.28.20) +SlurmctldHost=juju-c9fc6f-1(10.152.28.100) + +ClusterName=charmed-hpc +AuthType=auth/munge +Epilog=/usr/local/slurm/epilog +Prolog=/usr/local/slurm/prolog +FirstJobId=65536 +InactiveLimit=120 +JobCompType=jobcomp/filetxt +JobCompLoc=/var/log/slurm/jobcomp +KillWait=30 +MaxJobCount=10000 +MinJobAge=3600 +PluginDir=/usr/local/lib:/usr/local/slurm/lib +ReturnToService=0 +SchedulerType=sched/backfill +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdLogFile=/var/log/slurm/slurmd.log +SlurmctldPort=7002 +SlurmdPort=7003 +SlurmdSpoolDir=/var/spool/slurmd.spool +StateSaveLocation=/var/spool/slurm.state +SwitchType=switch/none +TmpFS=/tmp +WaitTime=30 + +# +# Node configurations +# +NodeName=juju-c9fc6f-2 NodeAddr=10.152.28.48 CPUs=1 RealMemory=1000 TmpDisk=10000 +NodeName=juju-c9fc6f-3 NodeAddr=10.152.28.49 CPUs=1 RealMemory=1000 TmpDisk=10000 +NodeName=juju-c9fc6f-4 NodeAddr=10.152.28.50 CPUs=1 RealMemory=1000 TmpDisk=10000 +NodeName=juju-c9fc6f-5 NodeAddr=10.152.28.51 CPUs=1 RealMemory=1000 TmpDisk=10000 + +# +# Down node configurations +# +DownNodes=juju-c9fc6f-5 State=DOWN Reason="Maintenance Mode" + +# +# Partition configurations +# +PartitionName=DEFAULT MaxTime=30 MaxNodes=10 State=UP +PartitionName=batch Nodes=juju-c9fc6f-2,juju-c9fc6f-3,juju-c9fc6f-4,juju-c9fc6f-5 MinNodes=4 MaxTime=120 AllowGroups=admin +""" + + def setUp(self): + self.manager = SlurmctldManager(snap=True) + self.config_name = "slurm" + self.setUpPyfakefs() + self.fs.create_file("/var/snap/slurm/common/.env") + self.fs.create_file( + "/var/snap/slurm/common/etc/slurm/slurm.conf", contents=self.EXAMPLE_SLURM_CONF + ) + + def test_config(self, *_) -> None: + """Test that the manager can manipulate the configuration file.""" + with self.manager.config() as config: + self.assertEqual(config.slurmd_log_file, "/var/log/slurm/slurmd.log") + self.assertEqual(config.nodes["juju-c9fc6f-2"]["NodeAddr"], "10.152.28.48") + self.assertEqual(config.down_nodes[0]["State"], "DOWN") + + config.slurmctld_port = "8081" + config.nodes["juju-c9fc6f-2"]["CPUs"] = "10" + config.nodes["juju-c9fc6f-20"] = {"CPUs": 1} + config.down_nodes.append( + {"DownNodes": ["juju-c9fc6f-3"], "State": "DOWN", "Reason": "New nodes"} + ) + del config.return_to_service + + # Exit the context to save changes to the file + + configs = Path("/var/snap/slurm/common/etc/slurm/slurm.conf").read_text().splitlines() + + self.assertIn("SlurmctldPort=8081", configs) + self.assertIn( + "NodeName=juju-c9fc6f-2 NodeAddr=10.152.28.48 CPUs=10 RealMemory=1000 TmpDisk=10000", + configs, + ) + self.assertIn("NodeName=juju-c9fc6f-20 CPUs=1", configs) + self.assertIn('DownNodes=juju-c9fc6f-3 State=DOWN Reason="New nodes"', configs) + self.assertNotIn("ReturnToService=0", configs) + + +@patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") +class TestSlurmdbdConfig(FsTestCase): + """Test the Slurmdbd service config manager.""" + + EXAMPLE_SLURMDBD_CONF = """# +# `slurmdbd.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. +# +ArchiveEvents=yes +ArchiveJobs=yes +ArchiveResvs=yes +ArchiveSteps=no +ArchiveTXN=no +ArchiveUsage=no +ArchiveScript=/usr/sbin/slurm.dbd.archive +AuthInfo=/var/run/munge/munge.socket.2 +AuthType=auth/munge +AuthAltTypes=auth/jwt +AuthAltParameters=jwt_key=16549684561684@ +DbdHost=slurmdbd-0 +DbdBackupHost=slurmdbd-1 +DebugLevel=info +PluginDir=/all/these/cool/plugins +PurgeEventAfter=1month +PurgeJobAfter=12month +PurgeResvAfter=1month +PurgeStepAfter=1month +PurgeSuspendAfter=1month +PurgeTXNAfter=12month +PurgeUsageAfter=24month +LogFile=/var/log/slurmdbd.log +PidFile=/var/run/slurmdbd.pid +SlurmUser=slurm +StoragePass=supersecretpasswd +StorageType=accounting_storage/mysql +StorageUser=slurm +StorageHost=127.0.0.1 +StoragePort=3306 +StorageLoc=slurm_acct_db +""" + + def setUp(self): + self.manager = SlurmdbdManager(snap=True) + self.config_name = "slurmdbd" + self.setUpPyfakefs() + self.fs.create_file("/var/snap/slurm/common/.env") + self.fs.create_file( + "/var/snap/slurm/common/etc/slurm/slurmdbd.conf", contents=self.EXAMPLE_SLURMDBD_CONF + ) + + def test_config(self, *_) -> None: + """Test that the manager can manipulate the configuration file.""" + with self.manager.config() as config: + self.assertEqual(config.auth_type, "auth/munge") + self.assertEqual(config.debug_level, "info") + + config.storage_pass = "newpass" + config.log_file = "/var/snap/slurm/common/var/log/slurmdbd.log" + del config.slurm_user + + # Exit the context to save changes to the file + + configs = Path("/var/snap/slurm/common/etc/slurm/slurmdbd.conf").read_text().splitlines() + + self.assertIn("StoragePass=newpass", configs) + self.assertIn("LogFile=/var/snap/slurm/common/var/log/slurmdbd.log", configs) + self.assertNotIn("SlurmUser=slurm", configs) + + +@patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") +class TestSlurmdConfig(FsTestCase): + """Test the Slurmd service config manager.""" + + def setUp(self): + self.manager = SlurmdManager(snap=True) + self.setUpPyfakefs() + self.fs.create_file("/var/snap/slurm/common/.env") + + def test_config(self, *_) -> None: + """Test config operations for the slurmd manager.""" + self.manager.config_server = "localhost" + self.assertEqual(self.manager.config_server, "localhost") + self.assertEqual( + dotenv.get_key("/var/snap/slurm/common/.env", "SLURMD_CONFIG_SERVER"), "localhost" + ) + + del self.manager.config_server + self.assertIsNone(self.manager.config_server) diff --git a/tox.ini b/tox.ini index 801d23e..7dd5c44 100644 --- a/tox.ini +++ b/tox.ini @@ -42,15 +42,14 @@ commands = # if this charm owns a lib, uncomment "lib_path" variable # and uncomment the following line # codespell {[vars]lib_path} - codespell {tox_root} + codespell {tox_root} -L assertIn ruff check {[vars]all_path} black --check --diff {[vars]all_path} [testenv:unit] description = Run unit tests deps = - pytest - coverage[toml] + -r {tox_root}/dev-requirements.txt -r {tox_root}/requirements.txt commands = coverage run --source={[vars]lib_path} \