diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index d709903..7c114a2 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -198,29 +198,24 @@ def config_name(self) -> str: class _EnvManager: """Control configuration of environment variables used in Slurm components. - Every configuration value is automatically uppercased and prefixed with the service name. + Every configuration value is automatically uppercased. """ - def __init__(self, file: Union[str, os.PathLike], prefix: str) -> None: + def __init__(self, file: Union[str, os.PathLike]) -> None: self._file: Path = Path(file) - self._service = prefix - - def _config_to_env_var(self, key: str) -> str: - """Get the environment variable corresponding to the configuration `key`.""" - return self._service.replace("-", "_").upper() + "_" + key def get(self, key: str) -> Optional[str]: """Get specific environment variable for service.""" - return dotenv.get_key(self._file, self._config_to_env_var(key)) + return dotenv.get_key(self._file, key.upper()) def set(self, config: Mapping[str, Any]) -> None: """Set environment variable for service.""" for key, value in config.items(): - dotenv.set_key(self._file, self._config_to_env_var(key), str(value)) + dotenv.set_key(self._file, key.upper(), str(value)) def unset(self, key: str) -> None: """Unset environment variable for service.""" - dotenv.unset_key(self._file, self._config_to_env_var(key)) + dotenv.unset_key(self._file, key.upper()) class _ConfigManager(ABC): @@ -418,11 +413,11 @@ def var_lib_path(self) -> Path: """Get the path to the Slurm variable state data directory.""" @abstractmethod - def service_manager_for(self, type: _ServiceType) -> _ServiceManager: + def service_manager_for(self, service: _ServiceType) -> _ServiceManager: """Return the `ServiceManager` for the specified `ServiceType`.""" @abstractmethod - def _env_manager_for(self, type: _ServiceType) -> _EnvManager: + def env_manager_for(self, service: _ServiceType) -> _EnvManager: """Return the `_EnvManager` for the specified `ServiceType`.""" @@ -459,13 +454,13 @@ def var_lib_path(self) -> Path: """Get the path to the Slurm variable state data directory.""" return Path("/var/snap/slurm/common/var/lib/slurm") - def service_manager_for(self, type: _ServiceType) -> _ServiceManager: + def service_manager_for(self, service: _ServiceType) -> _ServiceManager: """Return the `ServiceManager` for the specified `ServiceType`.""" - return _SnapServiceManager(type) + return _SnapServiceManager(service) - def _env_manager_for(self, type: _ServiceType) -> _EnvManager: + def env_manager_for(self, service: _ServiceType) -> _EnvManager: """Return the `_EnvManager` for the specified `ServiceType`.""" - return _EnvManager(file="/var/snap/slurm/common/.env", prefix=type.value) + return _EnvManager(file="/var/snap/slurm/common/.env") class _AptManager(_OpsManager): @@ -483,6 +478,47 @@ def __init__(self, service: _ServiceType) -> None: def install(self) -> None: """Install Slurm using the `slurm` snap.""" + self._init_ubuntu_hpc_ppa() + self._install_service() + # Debian package postinst hook does not create a `StateSaveLocation` directory + # so we make one here that is only r/w by owner. + _logger.debug("creating slurm `StateSaveLocation` directory") + Path("/var/lib/slurm/slurm.state").mkdir(mode=0o600, exist_ok=True) + self._apply_overrides() + + def version(self) -> str: + """Get the current version of Slurm installed on the system.""" + try: + return apt.DebianPackage.from_installed_package(self._service_name).version.number + except apt.PackageNotFoundError as e: + raise SlurmOpsError(f"unable to retrieve {self._service_name} version. reason: {e}") + + @property + def etc_path(self) -> Path: + """Get the path to the Slurm configuration directory.""" + return Path("/etc/slurm") + + @property + def var_lib_path(self) -> Path: + """Get the path to the Slurm variable state data directory.""" + return Path("/var/lib/slurm") + + def service_manager_for(self, service: _ServiceType) -> _ServiceManager: + """Return the `ServiceManager` for the specified `ServiceType`.""" + return _SystemctlServiceManager(service) + + def env_manager_for(self, service: _ServiceType) -> _EnvManager: + """Return the `_EnvManager` for the specified `ServiceType`.""" + return _EnvManager(file=f"/etc/default/{service.value}") + + @staticmethod + def _init_ubuntu_hpc_ppa() -> None: + """Initialize `apt` to use Ubuntu HPC Debian package repositories. + + Raises: + SlurmOpsError: Raised if `apt` fails to update with Ubuntu HPC repositories enabled. + """ + _logger.debug("initializing apt to use ubuntu hpc debian package repositories") slurm_wlm = apt.DebianRepository( enabled=True, repotype="deb", @@ -526,7 +562,6 @@ def install(self) -> None: """ ) ) - experimental = apt.DebianRepository( enabled=True, repotype="deb", @@ -570,125 +605,180 @@ def install(self) -> None: """ ) ) - repositories = apt.RepositoryMapping() repositories.add(slurm_wlm) repositories.add(experimental) try: apt.update() - apt.add_package([self._service_name, "mungectl", "prometheus-slurm-exporter"]) - except apt.PackageNotFoundError as e: - raise SlurmOpsError(f"failed to install {self._service_name}. reason: {e}") - except apt.PackageError as e: - raise SlurmOpsError(f"failed to install {self._service_name}. reason: {e}") + except subprocess.CalledProcessError as e: + raise SlurmOpsError( + f"failed to initialize apt to use ubuntu hpc repositories. reason: {e}" + ) - self._env_file.touch(exist_ok=True) - # Debian package postinst hook does not create a `StateSaveLocation` directory - # so we make one here that is only r/w by owner. - Path("/var/lib/slurm/slurm.state").mkdir(mode=0o600, exist_ok=True) + @staticmethod + def _set_ulimit() -> None: + """Set `ulimit` on nodes that need to be able to open many files at once.""" + ulimit_config_file = Path("/etc/security/limits.d/20-charmed-hpc-openfile.conf") + ulimit_config = textwrap.dedent( + """ + * soft nofile 1048576 + * hard nofile 1048576 + * soft memlock unlimited + * hard memlock unlimited + * soft stack unlimited + * hard stack unlimited + """ + ) + _logger.debug("setting ulimit configuration for node to:\n%s", ulimit_config) + ulimit_config_file.write_text(ulimit_config) + ulimit_config_file.chmod(0o644) - if self._service_name == "slurmd": - override = Path("/etc/systemd/system/slurmd.service.d/10-slurmd-conf-server.conf") - override.parent.mkdir(exist_ok=True, parents=True) - override.write_text( - textwrap.dedent( - """ - [Service] - ExecStart= - ExecStart=/usr/bin/sh -c "/usr/sbin/slurmd -D -s $${SLURMD_CONFIG_SERVER:+--conf-server $$SLURMD_CONFIG_SERVER} $$SLURMD_OPTIONS" - """ - ) - ) + def _install_service(self) -> None: + """Install Slurm service and other necessary packages. - if self._service_name == "slurmrestd": - # TODO: https://github.com/charmed-hpc/hpc-libs/issues/39 - - # Make `slurmrestd` package postinst hook create the system user and group - # so that we do not need to do it manually here. - try: - subprocess.check_output(["groupadd", "--gid", 64031, "slurmrestd"]) - except subprocess.CalledProcessError as e: - if e.returncode == 9: - _logger.debug("group 'slurmrestd' already exists") - else: - raise SlurmOpsError(f"failed to create group 'slurmrestd'. reason: {e}") - - try: - subprocess.check_output( - [ - "adduser", - "--system", - "--gid", - 64031, - "--uid", - 64031, - "--no-create-home", - "--home", - "/nonexistent", - "slurmrestd", - ] - ) - except subprocess.CalledProcessError as e: - if e.returncode == 9: - _logger.debug("user 'slurmrestd' already exists") - else: - raise SlurmOpsError(f"failed to create user 'slurmrestd'. reason: {e}") - - _logger.debug("replacing default slurmrestd service file") - override = Path("/usr/lib/systemd/system/slurmrestd.service") - override.write_text( - textwrap.dedent( - """ - [Unit] - Description=Slurm REST daemon - After=network.target munge.service slurmctld.service - ConditionPathExists=/etc/slurm/slurm.conf - Documentation=man:slurmrestd(8) - - [Service] - Type=simple - EnvironmentFile=-/etc/default/slurmrestd - Environment="SLURM_JWT=daemon" - ExecStart=/usr/sbin/slurmrestd $SLURMRESTD_OPTIONS -vv 0.0.0.0:6820 - ExecReload=/bin/kill -HUP $MAINPID - User=slurmrestd - Group=slurmrestd - - # Restart service if failed - Restart=on-failure - RestartSec=30s - - [Install] - WantedBy=multi-user.target - """ + Raises: + SlurmOpsError: Raised if `apt` fails to install the required Slurm packages. + """ + packages = [self._service_name, "mungectl", "prometheus-slurm-exporter"] + match self._service_name: + case "slurmctld": + packages.extend(["libpmix-dev", "mailutils"]) + case "slurmd": + packages.extend(["libpmix-dev", "openmpi-bin"]) + case _: + _logger.debug( + "'%s' does not require any additional packages to be installed", + self._service_name, ) - ) - _systemctl("daemon-reload") - def version(self) -> str: - """Get the current version of Slurm installed on the system.""" + _logger.debug("installing packages %s with apt", packages) try: - return apt.DebianPackage.from_installed_package(self._service_name).version.number - except apt.PackageNotFoundError as e: - raise SlurmOpsError(f"unable to retrieve {self._service_name} version. reason: {e}") + apt.add_package(packages) + except (apt.PackageNotFoundError, apt.PackageError) as e: + raise SlurmOpsError(f"failed to install {self._service_name}. reason: {e}") - @property - def etc_path(self) -> Path: - """Get the path to the Slurm configuration directory.""" - return Path("/etc/slurm") + def _apply_overrides(self) -> None: + """Override defaults supplied provided by Slurm Debian packages.""" + match self._service_name: + case "slurmctld": + _logger.debug("overriding default slurmctld service configuration") + self._set_ulimit() - @property - def var_lib_path(self) -> Path: - """Get the path to the Slurm variable state data directory.""" - return Path("/var/lib/slurm") + nofile_override = Path( + "/etc/systemd/system/slurmctld.service.d/10-slurmctld-nofile.conf" + ) + nofile_override.parent.mkdir(exist_ok=True, parents=True) + nofile_override.write_text( + textwrap.dedent( + """ + [Service] + LimitMEMLOCK=infinity + LimitNOFILE=1048576 + """ + ) + ) + case "slurmd": + _logger.debug("overriding default slurmd service configuration") + self._set_ulimit() - def service_manager_for(self, type: _ServiceType) -> _ServiceManager: - """Return the `ServiceManager` for the specified `ServiceType`.""" - return _SystemctlServiceManager(type) + nofile_override = Path( + "/etc/systemd/system/slurmctld.service.d/10-slurmd-nofile.conf" + ) + nofile_override.parent.mkdir(exist_ok=True, parents=True) + nofile_override.write_text( + textwrap.dedent( + """ + [Service] + LimitMEMLOCK=infinity + LimitNOFILE=1048576 + """ + ) + ) - def _env_manager_for(self, type: _ServiceType) -> _EnvManager: - """Return the `_EnvManager` for the specified `ServiceType`.""" - return _EnvManager(file=self._env_file, prefix=type.value) + config_override = Path( + "/etc/systemd/system/slurmd.service.d/20-slurmd-config-server.conf" + ) + config_override.parent.mkdir(exist_ok=True, parents=True) + config_override.write_text( + textwrap.dedent( + """ + [Service] + ExecStart= + ExecStart=/usr/bin/sh -c "/usr/sbin/slurmd -D -s $${SLURMD_CONFIG_SERVER:+--conf-server $$SLURMD_CONFIG_SERVER} $$SLURMD_OPTIONS" + """ + ) + ) + case "slurmrestd": + # TODO: https://github.com/charmed-hpc/hpc-libs/issues/39 - + # Make `slurmrestd` package preinst hook create the system user and group + # so that we do not need to do it manually here. + _logger.debug("creating slurmrestd user and group") + try: + subprocess.check_output(["groupadd", "--gid", 64031, "slurmrestd"]) + except subprocess.CalledProcessError as e: + if e.returncode == 9: + _logger.debug("group 'slurmrestd' already exists") + else: + raise SlurmOpsError(f"failed to create group 'slurmrestd'. reason: {e}") + + try: + subprocess.check_output( + [ + "adduser", + "--system", + "--group", + "--uid", + 64031, + "--no-create-home", + "--home", + "/nonexistent", + "slurmrestd", + ] + ) + except subprocess.CalledProcessError as e: + if e.returncode == 9: + _logger.debug("user 'slurmrestd' already exists") + else: + raise SlurmOpsError(f"failed to create user 'slurmrestd'. reason: {e}") + + # slurmrestd's preinst script does not create environment file. + _logger.debug("creating slurmrestd environment file") + Path("/etc/default/slurmrestd").touch(mode=0o644) + + _logger.debug("overriding default slurmrestd service configuration") + config_override = Path("/usr/lib/systemd/system/slurmrestd.service") + config_override.write_text( + textwrap.dedent( + """ + [Unit] + Description=Slurm REST daemon + After=network.target munge.service slurmctld.service + ConditionPathExists=/etc/slurm/slurm.conf + Documentation=man:slurmrestd(8) + + [Service] + Type=simple + EnvironmentFile=-/etc/default/slurmrestd + Environment="SLURM_JWT=daemon" + ExecStart=/usr/sbin/slurmrestd $SLURMRESTD_OPTIONS -vv 0.0.0.0:6820 + ExecReload=/bin/kill -HUP $MAINPID + User=slurmrestd + Group=slurmrestd + + # Restart service if failed + Restart=on-failure + RestartSec=30s + + [Install] + WantedBy=multi-user.target + """ + ) + ) + case _: + _logger.debug("'%s' does not require any overrides", self._service_name) + + _systemctl("daemon-reload") # TODO: https://github.com/charmed-hpc/hpc-libs/issues/36 - @@ -832,7 +922,7 @@ class SlurmdManager(_SlurmManagerBase): def __init__(self, *args, **kwargs) -> None: super().__init__(service=_ServiceType.SLURMD, *args, **kwargs) - self._env_manager = self._ops_manager._env_manager_for(_ServiceType.SLURMD) + self._env_manager = self._ops_manager.env_manager_for(_ServiceType.SLURMD) @property def user(self) -> str: @@ -847,17 +937,17 @@ def group(self) -> str: @property def config_server(self) -> str: """Get the config server address of this Slurmd node.""" - return self._env_manager.get("CONFIG_SERVER") + return self._env_manager.get("SLURMD_CONFIG_SERVER") @config_server.setter def config_server(self, addr: str) -> None: """Set the config server address of this Slurmd node.""" - self._env_manager.set({"CONFIG_SERVER": addr}) + self._env_manager.set({"SLURMD_CONFIG_SERVER": addr}) @config_server.deleter def config_server(self) -> None: """Unset the config server address of this Slurmd node.""" - self._env_manager.unset("CONFIG_SERVER") + self._env_manager.unset("SLURMD_CONFIG_SERVER") class SlurmdbdManager(_SlurmManagerBase): @@ -865,10 +955,26 @@ class SlurmdbdManager(_SlurmManagerBase): def __init__(self, *args, **kwargs) -> None: super().__init__(service=_ServiceType.SLURMDBD, *args, **kwargs) + self._env_manager = self._ops_manager.env_manager_for(_ServiceType.SLURMDBD) self.config = _SlurmdbdConfigManager( self._ops_manager.etc_path / "slurmdbd.conf", self.user, self.group ) + @property + def mysql_unix_port(self) -> str: + """Get the URI of the unix socket slurmdbd uses to communicate with MySQL.""" + return self._env_manager.get("MYSQL_UNIX_PORT") + + @mysql_unix_port.setter + def mysql_unix_port(self, socket_path: Union[str, os.PathLike]) -> None: + """Set the unix socket URI that slurmdbd will use to communicate with MySQL.""" + self._env_manager.set({"MYSQL_UNIX_PORT": socket_path}) + + @mysql_unix_port.deleter + def mysql_unix_port(self) -> None: + """Delete the configured unix socket URI.""" + self._env_manager.unset("MYSQL_UNIX_PORT") + class SlurmrestdManager(_SlurmManagerBase): """Manager for the Slurmrestd service.""" diff --git a/pyproject.toml b/pyproject.toml index b751b3e..8db83d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ log_cli_level = "INFO" # Formatting tools configuration [tool.black] line-length = 99 -target-version = ["py38"] +target-version = ["py310"] # Linting tools configuration [tool.ruff] diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 4159c04..8afb55a 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -10,21 +10,21 @@ import stat import subprocess from pathlib import Path -from unittest import TestCase from unittest.mock import patch -import charms.hpc_libs.v0.slurm_ops as slurm +import charms.operator_libs_linux.v0.apt as apt import dotenv from charms.hpc_libs.v0.slurm_ops import ( SlurmctldManager, SlurmdbdManager, SlurmdManager, SlurmOpsError, + SlurmrestdManager, _ServiceType, _SlurmManagerBase, _SnapManager, ) -from pyfakefs.fake_filesystem_unittest import TestCase as FsTestCase +from pyfakefs.fake_filesystem_unittest import TestCase FAKE_USER_UID = os.getuid() FAKE_USER_NAME = pwd.getpwuid(FAKE_USER_UID).pw_name @@ -72,6 +72,21 @@ latest/beta: ↑ latest/edge: 23.11.7 2024-06-26 (459) 114MB classic """ +APT_SLURM_INFO = """Desired=Unknown/Install/Remove/Purge/Hold +| Status=Not/Inst/Conf-files/Unpacked/halF-conf/Half-inst/trig-aWait/Trig-pend +|/ Err?=(none)/Reinst-required (Status,Err: uppercase=bad) +||/ Name Version Architecture Description ++++-==============-================-============-================================= +ii slurmctld 23.11.7-2ubuntu1 amd64 SLURM central management daemon +""" +ULIMIT_CONFIG = """ +* soft nofile 1048576 +* hard nofile 1048576 +* soft memlock unlimited +* hard memlock unlimited +* soft stack unlimited +* hard stack unlimited +""" MUNGEKEY_BASE64 = b"MTIzNDU2Nzg5MA==" JWT_KEY = """-----BEGIN RSA PRIVATE KEY----- MIIEpAIBAAKCAQEAt3PLWkwUOeckDwyMpHgGqmOZhitC8KfOQY/zPWfo+up5RQXz @@ -199,7 +214,7 @@ "charms.hpc_libs.v0.slurm_ops.subprocess.run", return_value=subprocess.CompletedProcess([], returncode=0), ) -class TestSlurmOps(TestCase): +class TestSlurmOpsError(TestCase): def test_error_message(self, *_) -> None: """Test that `SlurmOpsError` stores the correct message.""" message = "error message!" @@ -210,10 +225,10 @@ def test_error_message(self, *_) -> None: "charms.hpc_libs.v0.slurm_ops.subprocess.run", return_value=subprocess.CompletedProcess([], returncode=0), ) -class TestSnapPackageManager(FsTestCase): +class TestSnapPackageManager(TestCase): def setUp(self): - self.manager = _SnapManager() self.setUpPyfakefs() + self.manager = _SnapManager() self.fs.create_file("/var/snap/slurm/common/.env") def test_install(self, subcmd) -> None: @@ -236,7 +251,7 @@ def test_version_not_installed(self, subcmd) -> None: subcmd.return_value = subprocess.CompletedProcess( [], returncode=0, stdout=SNAP_SLURM_INFO_NOT_INSTALLED ) - with self.assertRaises(slurm.SlurmOpsError): + with self.assertRaises(SlurmOpsError): self.manager.version() args = subcmd.call_args[0][0] self.assertEqual(args, ["snap", "info", "slurm"]) @@ -244,10 +259,159 @@ def test_version_not_installed(self, subcmd) -> None: def test_call_error(self, subcmd) -> None: """Test that `slurm_ops` propagates errors when a command fails.""" subcmd.return_value = subprocess.CompletedProcess([], returncode=-1, stderr="error") - with self.assertRaises(slurm.SlurmOpsError): + with self.assertRaises(SlurmOpsError): self.manager.install() +@patch( + "charms.hpc_libs.v0.slurm_ops.subprocess.run", + return_value=subprocess.CompletedProcess([], returncode=0), +) +class TestAptPackageManager(TestCase): + """Test the `_AptManager` Slurm operations manager.""" + + def setUp(self) -> None: + self.setUpPyfakefs() + self.slurmctld = SlurmctldManager(snap=False) + self.slurmd = SlurmdManager(snap=False) + self.slurmdbd = SlurmdbdManager(snap=False) + self.slurmrestd = SlurmrestdManager(snap=False) + + self.fs.create_dir("/etc/default") + self.fs.create_dir("/etc/security/limits.d") + self.fs.create_dir("/etc/systemd/service/slurmctld.service.d") + self.fs.create_dir("/etc/systemd/service/slurmd.service.d") + self.fs.create_dir("/usr/lib/systemd/system") + self.fs.create_dir("/var/lib/slurm") + + def test_version(self, subcmd) -> None: + """Test that `version` gets the correct package version number.""" + subcmd.side_effect = [ + subprocess.CompletedProcess([], returncode=0, stdout="amd64"), + subprocess.CompletedProcess([], returncode=0, stdout=APT_SLURM_INFO), + ] + version = self.slurmctld.version() + args = subcmd.call_args[0][0] + self.assertEqual(version, "23.11.7-2ubuntu1") + self.assertListEqual(args, ["dpkg", "-l", "slurmctld"]) + + def test_version_not_installed(self, subcmd) -> None: + """Test that `version` throws an error if Slurm service is not installed.""" + subcmd.side_effect = [ + subprocess.CompletedProcess([], returncode=0, stdout="amd64"), + subprocess.CompletedProcess([], returncode=1), + ] + with self.assertRaises(SlurmOpsError): + self.slurmctld.version() + + @patch("charms.operator_libs_linux.v0.apt.DebianRepository._get_keyid_by_gpg_key") + @patch("charms.operator_libs_linux.v0.apt.DebianRepository._dearmor_gpg_key") + @patch("charms.operator_libs_linux.v0.apt.DebianRepository._write_apt_gpg_keyfile") + @patch("charms.operator_libs_linux.v0.apt.RepositoryMapping.add") + @patch("distro.codename") + def test_init_ubuntu_hpc_ppa(self, *_) -> None: + """Test that Ubuntu HPC repositories are initialized correctly.""" + self.slurmctld._ops_manager._init_ubuntu_hpc_ppa() + + @patch("charms.operator_libs_linux.v0.apt.DebianRepository._get_keyid_by_gpg_key") + @patch("charms.operator_libs_linux.v0.apt.DebianRepository._dearmor_gpg_key") + @patch("charms.operator_libs_linux.v0.apt.DebianRepository._write_apt_gpg_keyfile") + @patch("charms.operator_libs_linux.v0.apt.RepositoryMapping.add") + @patch("distro.codename") + @patch( + "charms.operator_libs_linux.v0.apt.update", + side_effect=subprocess.CalledProcessError(1, ["apt-get", "update", "--error-any"]), + ) + def test_init_ubuntu_hpc_ppa_fail(self, *_) -> None: + """Test that error is correctly bubbled up if `apt update` fails.""" + with self.assertRaises(SlurmOpsError): + self.slurmctld._ops_manager._init_ubuntu_hpc_ppa() + + def test_set_ulimit(self, *_) -> None: + """Test that the correct slurmctld and slurmd ulimit rules are applied.""" + self.slurmctld._ops_manager._set_ulimit() + + target = Path("/etc/security/limits.d/20-charmed-hpc-openfile.conf") + self.assertEqual(ULIMIT_CONFIG, target.read_text()) + f_info = target.stat() + self.assertEqual(stat.filemode(f_info.st_mode), "-rw-r--r--") + + @patch("charms.operator_libs_linux.v0.apt.add_package") + def test_install_service(self, add_package, *_) -> None: + """Test that `_install_service` installs the correct packages for each service.""" + # Install slurmctld. + self.slurmctld._ops_manager._install_service() + self.assertListEqual( + add_package.call_args[0][0], + ["slurmctld", "mungectl", "prometheus-slurm-exporter", "libpmix-dev", "mailutils"], + ) + + self.slurmd._ops_manager._install_service() + self.assertListEqual( + add_package.call_args[0][0], + ["slurmd", "mungectl", "prometheus-slurm-exporter", "libpmix-dev", "openmpi-bin"], + ) + + self.slurmdbd._ops_manager._install_service() + self.assertListEqual( + add_package.call_args[0][0], + ["slurmdbd", "mungectl", "prometheus-slurm-exporter"], + ) + + self.slurmrestd._ops_manager._install_service() + self.assertListEqual( + add_package.call_args[0][0], + ["slurmrestd", "mungectl", "prometheus-slurm-exporter"], + ) + + add_package.side_effect = apt.PackageError("failed to install packages!") + with self.assertRaises(SlurmOpsError): + self.slurmctld._ops_manager._install_service() + + def test_apply_overrides(self, subcmd) -> None: + """Test that the correct overrides are applied based on the Slurm service installed.""" + # Test overrides for slurmrestd first since it's easier to work with `call_args_list` + self.slurmrestd._ops_manager._apply_overrides() + groupadd = subcmd.call_args_list[0][0][0] + adduser = subcmd.call_args_list[1][0][0] + systemctl = subcmd.call_args_list[2][0][0] + self.assertListEqual(groupadd, ["groupadd", "--gid", 64031, "slurmrestd"]) + self.assertListEqual( + adduser, + [ + "adduser", + "--system", + "--group", + "--uid", + 64031, + "--no-create-home", + "--home", + "/nonexistent", + "slurmrestd", + ], + ) + self.assertListEqual(systemctl, ["systemctl", "daemon-reload"]) + + self.slurmctld._ops_manager._apply_overrides() + args = subcmd.call_args[0][0] + self.assertListEqual(args, ["systemctl", "daemon-reload"]) + + self.slurmd._ops_manager._apply_overrides() + self.assertListEqual(args, ["systemctl", "daemon-reload"]) + + self.slurmdbd._ops_manager._apply_overrides() + self.assertListEqual(args, ["systemctl", "daemon-reload"]) + + @patch("charms.hpc_libs.v0.slurm_ops._AptManager._init_ubuntu_hpc_ppa") + @patch("charms.hpc_libs.v0.slurm_ops._AptManager._install_service") + @patch("charms.hpc_libs.v0.slurm_ops._AptManager._apply_overrides") + def test_install(self, *_) -> None: + """Test public `install` method that encapsulates service install logic.""" + self.slurmctld.install() + f_info = Path("/var/lib/slurm/slurm.state").stat() + self.assertEqual(stat.filemode(f_info.st_mode), "drw-------") + + @patch( "charms.hpc_libs.v0.slurm_ops.subprocess.run", return_value=subprocess.CompletedProcess([], returncode=0), @@ -307,7 +471,7 @@ def test_active_not_installed(self, subcmd, *_) -> None: subcmd.return_value = subprocess.CompletedProcess( [], returncode=0, stdout=SNAP_SLURM_INFO_NOT_INSTALLED ) - with self.assertRaises(slurm.SlurmOpsError): + with self.assertRaises(SlurmOpsError): self.manager.service.active() args = subcmd.call_args[0][0] self.assertEqual(args, ["snap", "info", "slurm"]) @@ -378,7 +542,7 @@ def test_scontrol(self, subcmd) -> None: cls_name = f"Test{manager.service.type.value.capitalize()}Ops" globals()[cls_name] = type( cls_name, - (SlurmOpsBase, FsTestCase), + (SlurmOpsBase, TestCase), { "manager": manager, "config_name": config_name, @@ -387,13 +551,13 @@ def test_scontrol(self, subcmd) -> None: @patch("charms.hpc_libs.v0.slurm_ops.subprocess.run") -class TestSlurmctldConfig(FsTestCase): +class TestSlurmctldConfig(TestCase): """Test the Slurmctld service config manager.""" def setUp(self): + self.setUpPyfakefs() self.manager = SlurmctldManager(snap=True) self.config_name = "slurm" - self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") self.fs.create_file( "/var/snap/slurm/common/etc/slurm/slurm.conf", contents=EXAMPLE_SLURM_CONFIG @@ -439,13 +603,13 @@ def test_config(self, *_) -> None: @patch("charms.hpc_libs.v0.slurm_ops.subprocess.run") -class TestCgroupConfig(FsTestCase): +class TestCgroupConfig(TestCase): """Test the Slurmctld service cgroup config manager.""" def setUp(self) -> None: + self.setUpPyfakefs() self.manager = SlurmctldManager(snap=True) self.config_name = "slurmctld" - self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") self.fs.create_file( "/var/snap/slurm/common/etc/slurm/cgroup.conf", contents=EXAMPLE_CGROUP_CONFIG @@ -481,7 +645,7 @@ def test_config(self, *_) -> None: @patch("charms.hpc_libs.v0.slurm_ops.subprocess.run") -class TestSlurmdbdConfig(FsTestCase): +class TestSlurmdbdConfig(TestCase): """Test the Slurmdbd service config manager.""" def setUp(self): @@ -519,14 +683,29 @@ def test_config(self, *_) -> None: self.assertEqual(f_info.st_uid, FAKE_USER_UID) self.assertEqual(f_info.st_gid, FAKE_GROUP_GID) + def test_mysql_unix_port(self, *_) -> None: + """Test that `MYSQL_UNIX_PORT` is configured correctly.""" + self.manager.mysql_unix_port = "/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock" + self.assertEqual( + self.manager.mysql_unix_port, + "/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", + ) + self.assertEqual( + dotenv.get_key("/var/snap/slurm/common/.env", "MYSQL_UNIX_PORT"), + "/var/snap/charmed-mysql/common/run/mysqlrouter/mysql.sock", + ) + + del self.manager.mysql_unix_port + self.assertIsNone(self.manager.mysql_unix_port) + @patch("charms.hpc_libs.v0.slurm_ops.subprocess.run") -class TestSlurmdConfig(FsTestCase): +class TestSlurmdConfig(TestCase): """Test the Slurmd service config manager.""" def setUp(self): - self.manager = SlurmdManager(snap=True) self.setUpPyfakefs() + self.manager = SlurmdManager(snap=True) self.fs.create_file("/var/snap/slurm/common/.env") def test_config(self, *_) -> None: