diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a314592..c1014b1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: woke uses: get-woke/woke-action@v0 with: @@ -35,18 +35,29 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: python3 -m pip install tox - name: Run linters run: tox -e lint + type: + name: Type check with pyright + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install dependencies + run: python3 -m pip install tox + - name: Run pyright + run: tox -e type + unit-test: name: Unit tests runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: python3 -m pip install tox - name: Run tests @@ -63,10 +74,11 @@ jobs: needs: - inclusive-naming-check - lint + - type - unit-test steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup operator environment uses: charmed-kubernetes/actions-operator@main with: diff --git a/.gitignore b/.gitignore index 9ceb2e5..9469ce2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ __pycache__/ *.py[cod] .idea .vscode/ -version + +# Disable woke checking for nhc.conf.tmpl +src/templates/nhc.conf.tmpl diff --git a/README.md b/README.md index 544759c..ee81b61 100644 --- a/README.md +++ b/README.md @@ -24,13 +24,33 @@ This operator should be used with Juju 3.x or greater. ```shell $ juju deploy slurmctld --channel edge $ juju deploy slurmd --channel edge -$ juju deploy slurmdbd --channel edge -$ juju deploy mysql --channel 8.0/edge -$ juju deploy mysql-router slurmdbd-mysql-router --channel dpe/edge -$ juju integrate slurmctld:slurmd slurmd:slurmd -$ juju integrate slurmdbd-mysql-router:backend-database mysql:database -$ juju integrate slurmdbd:database slurmdbd-mysql-router:database -$ juju integrate slurmctld:slurmdbd slurmdbd:slurmdbd +$ juju integrate slurmctld:slurmd slurmd:slurmctld +``` + +### Operations +This charm hardens and simplifies operations by codifying common administration operations as charm actions. + +#### Partition Configuration +Specify partition parameters using the charm configuration, `partition-config`. + +##### Use the `partition-config` to set custom partition parameters. +```bash +$ juju config slurmd partition-config="State=INACTIVE" +``` + +#### Node Configuration Parameters +You can get and set the node configuration using the `node-config` action. + +##### Use the `node-config` action to get the node configuration for the unit. +```bash +$ juju run --quiet slurmd/0 node-config --format json | jq ".[].results.node.config" +"NodeName=juju-462521-4 NodeAddr=10.240.222.28 State=UNKNOWN RealMemory=64012 CPUs=12 ThreadsPerCore=2 CoresPerSocket=6 SocketsPerBoard=1" +``` + +##### Use the `node-config` action to set a custom weight value for the node. +```bash +$ juju run --quiet slurmd/0 node-config parameters="Weight=5000" --format json | jq ".[].results.node.config" +"NodeName=juju-462521-4 NodeAddr=10.240.222.28 State=UNKNOWN RealMemory=64012 CPUs=12 ThreadsPerCore=2 CoresPerSocket=6 SocketsPerBoard=1 Weight=5000" ``` ## Project & Community diff --git a/actions.yaml b/actions.yaml deleted file mode 100644 index 5be9cf4..0000000 --- a/actions.yaml +++ /dev/null @@ -1,15 +0,0 @@ -version: - description: Return version of installed software. -node-configured: - description: Remove a node from DownNodes when the reason is `New node`. -get-node-inventory: - description: Return node inventory. -set-node-inventory: - description: Modify node inventory. - params: - real-memory: - type: integer - description: Total amount of memory of the node, in MB. - -show-nhc-config: - description: Display the currently used `nhc.conf`. diff --git a/charmcraft.yaml b/charmcraft.yaml index 19539d7..dac7bdb 100644 --- a/charmcraft.yaml +++ b/charmcraft.yaml @@ -1,7 +1,29 @@ -# Copyright 2020 Omnivector, LLC -# See LICENSE file for licensing details. - +name: slurmd type: charm + +summary: | + Slurmd, the compute node daemon of Slurm. + +description: | + This charm provides slurmd, munged, and the bindings to other utilities + that make lifecycle operations a breeze. + + slurmd is the compute node daemon of SLURM. It monitors all tasks running + on the compute node, accepts work (tasks), launches tasks, and kills + running tasks upon request. + +links: + contact: https://matrix.to/#/#hpc:ubuntu.com + + issues: + - https://github.com/charmed-hpc/slurmd-operator/issues + + source: + - https://github.com/charmed-hpc/slurmd-operator + +assumes: + - juju + bases: - build-on: - name: ubuntu @@ -10,25 +32,71 @@ bases: - name: ubuntu channel: "22.04" architectures: [amd64] - - name: centos - channel: "7" - architectures: [amd64] parts: charm: - build-packages: [git] - charm-python-packages: [setuptools] - - # Create a version file and pack it into the charm. This is dynamically generated - # as part of the build process for a charm to ensure that the git revision of the - # charm is always recorded in this version file. - version-file: - plugin: nil build-packages: - - git + - wget override-build: | - VERSION=$(git -C $CRAFT_PART_SRC/../../charm/src describe --dirty --always) - echo "Setting version to $VERSION" - echo $VERSION > $CRAFT_PART_INSTALL/version - stage: - - version + wget https://github.com/mej/nhc/releases/download/1.4.3/lbnl-nhc-1.4.3.tar.gz + craftctl default + +provides: + slurmctld: + interface: slurmd + limit: 1 + +config: + options: + partition-config: + type: string + default: "" + description: > + Additional partition configuration parameters, specified as a space separated `key=value` + in a single line. Find a list of all possible partition configuration parameters + [here](https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION). + + + Example usage: + ```bash + $ juju config slurmd partition-config="DefaultTime=45:00 MaxTime=1:00:00" + ``` + + nhc-conf: + default: "" + type: string + description: > + Multiline string. + These lines are appended to the `nhc.conf` maintained by the charm. + + Example usage: + ```bash + $ juju config slurmd nhc-conf="$(cat extra-nhc.conf)" + ``` + +actions: + node-configured: + description: Remove a node from DownNodes when the reason is `New node`. + + node-config: + description: > + Set or return node configuration parameters. + + To get the current node configuration for this unit: + ``bash + $ juju run slurmd/0 node-parameters + ``` + + To set node level configuration parameters for the unit `slurmd/0`: + ``bash + $ juju run slurmd/0 node-config parameters="Weight=200 Gres=gpu:tesla:1,gpu:kepler:1,bandwidth:lustre:no_consume:4G" + ``` + + params: + parameters: + type: string + description: > + Node configuration parameter as defined [here](https://slurm.schedmd.com/slurm.conf.html#SECTION_NODE-CONFIGURATION). + + show-nhc-config: + description: Display `nhc.conf`. diff --git a/config.yaml b/config.yaml deleted file mode 100644 index b2ebfea..0000000 --- a/config.yaml +++ /dev/null @@ -1,40 +0,0 @@ -options: - custom-slurm-repo: - type: string - default: "" - description: > - Use a custom repository for Slurm installation. - - This can be set to the Organization's local mirror/cache of packages and - supersedes the Omnivector repositories. Alternatively, it can be used to - track a `testing` Slurm version, e.g. by setting to - `ppa:omnivector/osd-testing` (on Ubuntu), or - `https://omnivector-solutions.github.io/repo/centos7/stable/$basearch` - (on CentOS). - - Note: The configuration `custom-slurm-repo` must be set *before* - deploying the units. Changing this value after deploying the units will - not reinstall Slurm. - partition-config: - type: string - default: "" - description: > - Extra partition configuration, specified as a space separated `key=value` - in a single line. - - Example usage: - $ juju config slurmd partition-config="DefaultTime=45:00 MaxTime=1:00:00" - partition-state: - type: string - default: "UP" - description: > - State of partition or availability for use. Possible values are `UP`, - `DOWN`, `DRAIN` and `INACTIVE`. The default value is `UP`. See also the - related `Alternate` keyword. - nhc-conf: - default: "" - type: string - description: > - Custom extra configuration to use for Node Health Check. - - These lines are appended to a basic `nhc.conf` provided by the charm. diff --git a/dispatch b/dispatch index 7f58019..2b6f3b8 100755 --- a/dispatch +++ b/dispatch @@ -1,44 +1,11 @@ #!/bin/bash -# This hook installs the dependencies needed to run the charm, -# creates the dispatch executable, regenerates the symlinks for start and -# upgrade-charm, and kicks off the operator framework. - set -e -# Source the os-release information into the env -. /etc/os-release - if ! [[ -f '.installed' ]] then - if [[ $ID == 'centos' ]] - then - # Install dependencies and build custom python - yum -y install epel-release - yum -y install wget gcc make tar bzip2-devel zlib-devel xz-devel openssl-devel libffi-devel sqlite-devel ncurses-devel - - export PYTHON_VERSION=3.8.16 - wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz -P /tmp - tar xvf /tmp/Python-${PYTHON_VERSION}.tar.xz -C /tmp - cd /tmp/Python-${PYTHON_VERSION} - ./configure --enable-optimizations - make -C /tmp/Python-${PYTHON_VERSION} -j $(nproc) altinstall - cd $OLDPWD - rm -rf /tmp/Python* - - elif [[ $ID == 'ubuntu' ]] - then - # Necessary to compile and install NHC - apt-get install --assume-yes make - fi - touch .installed -fi - -# set the correct python bin path -if [[ $ID == "centos" ]] -then - PYTHON_BIN="/usr/bin/env python3.8" -else - PYTHON_BIN="/usr/bin/env python3" + # Necessary to compile and install NHC + apt-get install --assume-yes make + touch .installed fi -JUJU_DISPATCH_PATH="${JUJU_DISPATCH_PATH:-$0}" PYTHONPATH=lib:venv $PYTHON_BIN ./src/charm.py \ No newline at end of file +JUJU_DISPATCH_PATH="${JUJU_DISPATCH_PATH:-$0}" PYTHONPATH=lib:venv /usr/bin/env python3 ./src/charm.py diff --git a/lib/charms/fluentbit/v0/fluentbit.py b/lib/charms/fluentbit/v0/fluentbit.py deleted file mode 100644 index aa68450..0000000 --- a/lib/charms/fluentbit/v0/fluentbit.py +++ /dev/null @@ -1,203 +0,0 @@ -r"""Fluentbit charm libraries. - -This library contains two main classes: `FluentbitProvider` and -`FluentbitClient`. `FluentbitProvider` class is instantiated in the Fluentbit -Server charm, and receives configuration data through a relation to other -charms. `FluentbitClient` class should be instantiated in any charm that wants -to ship logs via Fluentbit. - -## Forwarding logs using Fluentbit - -To forward logs from your charm to a centralized place using Fluentbit, -instantiate the `FluentbitClient()` class and handle the `relation_created` -event in your main charm code. In this event, your charm must pass all the -configuration parameters necessary to configure Fluentbit: the inputs, the -parsers, and the filters. - -For example: - -```python -class MyCharm(CharmBase): - def __init__(self, *args): - super().__init__(*args) - - self._fluentbit = FluentbitClient(self, "fluentbit") - - self.framework.observe(self.on.fluentbit.relation_created, - self._fluentbit_relation_created) - - def _fluentbit_relation_created(self, event): - cfg = [{"input": [("name", "tail"), - ("path", "/var/log/foo/bar.log"), - ("path_key", "filename"), - ("tag", "foo"), - ("parser", "bar")]}, - {"parser": [("name", "bar"), - ("format", "regex"), - ("regex", "^\[(?