From bb03e5fd7d449655e8123436ea6abd7b97b570dd Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 20 Nov 2024 15:01:10 -0600 Subject: [PATCH 01/13] chore(deps): update dependency cosl to v0.0.43 (#165) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- charms/worker/k8s/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charms/worker/k8s/requirements.txt b/charms/worker/k8s/requirements.txt index 1fd57d98..9e532201 100644 --- a/charms/worker/k8s/requirements.txt +++ b/charms/worker/k8s/requirements.txt @@ -6,7 +6,7 @@ ops-interface-kube-control @ git+https://github.com/charmed-kubernetes/interface ops.interface_aws @ git+https://github.com/charmed-kubernetes/interface-aws-integration@main#subdirectory=ops ops.interface_gcp @ git+https://github.com/charmed-kubernetes/interface-gcp-integration@main#subdirectory=ops ops.interface_azure @ git+https://github.com/charmed-kubernetes/interface-azure-integration@main#subdirectory=ops -cosl==0.0.42 +cosl==0.0.43 ops==2.17.0 pydantic==1.10.19 PyYAML==6.0.2 From f2afacec840ff49b6ebc723c5d56e59b5cab15f0 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Thu, 21 Nov 2024 00:12:26 +0200 Subject: [PATCH 02/13] Extend ceph sc tests (#174) We'll extend the Ceph storage class test to actually create a PVC and ensure that pods can read/write data from these volumes. While at it, we're making some slight changes to the ceph bundle. At the moment, it deploys a single unit with two OSDs and node based replication (default). For this reason, the PGs are inactive and the rbd commands hang (and implicitly the Ceph provisioner). We'll fix this by using three units, each containing one OSD. Co-authored-by: Adam Dyess --- tests/integration/conftest.py | 2 +- tests/integration/data/test-bundle-ceph.yaml | 4 +- .../data/test_ceph/ceph-xfs-pvc.yaml | 16 +++++ .../data/test_ceph/pv-reader-pod.yaml | 21 ++++++ .../data/test_ceph/pv-writer-pod.yaml | 21 ++++++ tests/integration/helpers.py | 66 ++++++++++++++++++- tests/integration/test_ceph.py | 40 ++++++++++- tox.ini | 4 +- 8 files changed, 168 insertions(+), 6 deletions(-) create mode 100755 tests/integration/data/test_ceph/ceph-xfs-pvc.yaml create mode 100755 tests/integration/data/test_ceph/pv-reader-pod.yaml create mode 100755 tests/integration/data/test_ceph/pv-writer-pod.yaml diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ffd7857a..42a37073 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -343,7 +343,7 @@ async def deploy_model( await the_model.wait_for_idle( apps=list(bundle.applications), status="active", - timeout=30 * 60, + timeout=60 * 60, ) try: yield the_model diff --git a/tests/integration/data/test-bundle-ceph.yaml b/tests/integration/data/test-bundle-ceph.yaml index b1a26b9a..8b803e9e 100644 --- a/tests/integration/data/test-bundle-ceph.yaml +++ b/tests/integration/data/test-bundle-ceph.yaml @@ -33,9 +33,9 @@ applications: charm: ceph-osd channel: quincy/stable constraints: cores=2 mem=4G root-disk=16G - num_units: 1 + num_units: 3 storage: - osd-devices: 1G,2 + osd-devices: 1G,1 osd-journals: 1G,1 relations: - [k8s, k8s-worker:cluster] diff --git a/tests/integration/data/test_ceph/ceph-xfs-pvc.yaml b/tests/integration/data/test_ceph/ceph-xfs-pvc.yaml new file mode 100755 index 00000000..ae5c5685 --- /dev/null +++ b/tests/integration/data/test_ceph/ceph-xfs-pvc.yaml @@ -0,0 +1,16 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +apiVersion: v1 +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: raw-block-pvc +spec: + accessModes: + - ReadWriteOnce + volumeMode: Filesystem + resources: + requests: + storage: 64Mi + storageClassName: ceph-xfs diff --git a/tests/integration/data/test_ceph/pv-reader-pod.yaml b/tests/integration/data/test_ceph/pv-reader-pod.yaml new file mode 100755 index 00000000..c87c8691 --- /dev/null +++ b/tests/integration/data/test_ceph/pv-reader-pod.yaml @@ -0,0 +1,21 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +apiVersion: v1 +kind: Pod +metadata: + name: pv-reader-test + namespace: default +spec: + restartPolicy: Never + volumes: + - name: pvc-test + persistentVolumeClaim: + claimName: raw-block-pvc + containers: + - name: pv-reader + image: busybox + command: ["/bin/sh", "-c", "cat /pvc/test_file"] + volumeMounts: + - name: pvc-test + mountPath: /pvc diff --git a/tests/integration/data/test_ceph/pv-writer-pod.yaml b/tests/integration/data/test_ceph/pv-writer-pod.yaml new file mode 100755 index 00000000..129849b5 --- /dev/null +++ b/tests/integration/data/test_ceph/pv-writer-pod.yaml @@ -0,0 +1,21 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +apiVersion: v1 +kind: Pod +metadata: + name: pv-writer-test + namespace: default +spec: + restartPolicy: Never + volumes: + - name: pvc-test + persistentVolumeClaim: + claimName: raw-block-pvc + containers: + - name: pv-writer + image: busybox + command: ["/bin/sh", "-c", "echo 'PVC test data.' > /pvc/test_file"] + volumeMounts: + - name: pvc-test + mountPath: /pvc diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index f58037e3..c1cee35a 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -2,6 +2,8 @@ # See LICENSE file for licensing details. """Additions to tools missing from juju library.""" +# pylint: disable=too-many-arguments,too-many-positional-arguments + import ipaddress import json import logging @@ -9,8 +11,9 @@ from typing import List import yaml +from juju import unit from juju.model import Model -from tenacity import retry, stop_after_attempt, wait_fixed +from tenacity import AsyncRetrying, retry, stop_after_attempt, wait_fixed log = logging.getLogger(__name__) @@ -117,3 +120,64 @@ async def ready_nodes(k8s, expected_count): for node, ready in ready_nodes.items(): log.info("Node %s is %s..", node, "ready" if ready else "not ready") assert ready, f"Node not yet ready: {node}." + + +async def wait_pod_phase( + k8s: unit.Unit, + name: str, + phase: str, + namespace: str = "default", + retry_times: int = 30, + retry_delay_s: int = 5, +): + """Wait for the pod to reach the specified phase (e.g. Succeeded). + + Args: + k8s: k8s unit + name: the pod name + phase: expected phase + namespace: pod namespace + retry_times: the number of retries + retry_delay_s: retry interval + + """ + async for attempt in AsyncRetrying( + stop=stop_after_attempt(retry_times), wait=wait_fixed(retry_delay_s) + ): + with attempt: + cmd = " ".join( + [ + "k8s kubectl wait", + f"--namespace {namespace}", + "--for=jsonpath='{.status.phase}'=" + phase, + f"pod/{name}", + "--timeout 1s", + ] + ) + action = await k8s.run(cmd) + result = await action.wait() + assert ( + result.results["return-code"] == 0 + ), f"Failed waiting for pod to reach {phase} phase." + + +async def get_pod_logs( + k8s: unit.Unit, + name: str, + namespace: str = "default", +) -> str: + """Retrieve pod logs. + + Args: + k8s: k8s unit + name: pod name + namespace: pod namespace + + Returns: + the pod logs as string. + """ + cmd = " ".join(["k8s kubectl logs", f"--namespace {namespace}", f"pod/{name}"]) + action = await k8s.run(cmd) + result = await action.wait() + assert result.results["return-code"] == 0, f"Failed to retrieve pod {name} logs." + return result.results["stdout"] diff --git a/tests/integration/test_ceph.py b/tests/integration/test_ceph.py index c526a998..0bfc02d8 100644 --- a/tests/integration/test_ceph.py +++ b/tests/integration/test_ceph.py @@ -6,9 +6,13 @@ # pylint: disable=duplicate-code """Integration tests.""" +from pathlib import Path + import pytest from juju import model, unit +from . import helpers + # This pytest mark configures the test environment to use the Canonical Kubernetes # bundle with ceph, for all the test within this module. pytestmark = [ @@ -17,11 +21,45 @@ ] +def _get_data_file_path(name) -> str: + """Retrieve the full path of the specified test data file.""" + path = Path(__file__).parent / "data" / "test_ceph" / name + return str(path) + + @pytest.mark.abort_on_fail async def test_ceph_sc(kubernetes_cluster: model.Model): - """Test that a ceph storage class is available.""" + """Test that a ceph storage class is available and validate pv attachments.""" k8s: unit.Unit = kubernetes_cluster.applications["k8s"].units[0] event = await k8s.run("k8s kubectl get sc -o=jsonpath='{.items[*].provisioner}'") result = await event.wait() stdout = result.results["stdout"] assert "rbd.csi.ceph.com" in stdout, f"No ceph provisioner found in: {stdout}" + + # Copy pod definitions. + for fname in ["ceph-xfs-pvc.yaml", "pv-writer-pod.yaml", "pv-reader-pod.yaml"]: + await k8s.scp_to(_get_data_file_path(fname), f"/tmp/{fname}") + + # Create "ceph-xfs" PVC. + event = await k8s.run("k8s kubectl apply -f /tmp/ceph-xfs-pvc.yaml") + result = await event.wait() + assert result.results["return-code"] == 0, "Failed to create pvc." + + # Create a pod that writes to the Ceph PV. + event = await k8s.run("k8s kubectl apply -f /tmp/pv-writer-pod.yaml") + result = await event.wait() + assert result.results["return-code"] == 0, "Failed to create writer pod." + + # Wait for the pod to exit successfully. + await helpers.wait_pod_phase(k8s, "pv-writer-test", "Succeeded") + + # Create a pod that reads the PV data and writes it to the log. + event = await k8s.run("k8s kubectl apply -f /tmp/pv-reader-pod.yaml") + result = await event.wait() + assert result.results["return-code"] == 0, "Failed to create reader pod." + + await helpers.wait_pod_phase(k8s, "pv-reader-test", "Succeeded") + + # Check the logged PV data. + logs = await helpers.get_pod_logs(k8s, "pv-reader-test") + assert "PVC test data" in logs diff --git a/tox.ini b/tox.ini index 9241a42b..4ec8b7df 100644 --- a/tox.ini +++ b/tox.ini @@ -89,8 +89,10 @@ description = Run integration tests deps = -r test_requirements.txt commands = pytest -v --tb native \ - --log-cli-level=INFO \ -s {toxinidir}/tests/integration \ + --log-cli-level INFO \ + --log-format "%(asctime)s %(levelname)s %(message)s" \ + --log-date-format "%Y-%m-%d %H:%M:%S" \ --crash-dump=on-failure \ --crash-dump-args='-j snap.k8s.* --as-root' \ {posargs} From f7c2167c23ee690a463413f062fc0345486658fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20Go=C5=82aszewski?= Date: Thu, 21 Nov 2024 14:05:31 +0100 Subject: [PATCH 03/13] Create SECURITY.md (#180) This pr adds security.md and introduces a security policy enabling users to report security issues --- SECURITY.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..a7b53c1a --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,15 @@ +# Security Policy + +## Reporting a Vulnerability + +To report a security issue, please follow the steps below: + +Using GitHub, file a [Private Security Report](https://github.com/canonical/k8s-operator/security/advisories/new) with: +- A description of the issue +- Steps to reproduce the issue +- Affected versions of the `k8s-operator` package +- Any known mitigations for the issue + +The [Ubuntu Security disclosure and embargo policy](https://ubuntu.com/security/disclosure-policy) contains more information about what to expect during this process and our requirements for responsible disclosure. + +Thank you for contributing to the security and integrity of the `k8s-operator`! From 46d79b8ba8431c402f4e4a6eea96329c2bb38504 Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Thu, 21 Nov 2024 11:00:00 -0600 Subject: [PATCH 04/13] Support a charm resource to override the installed snap (#149) * Support a charm resource to override the installed snap * Deploy with the default resource when deploying with a local charm * Support snap-installation resource which includes yaml and snap files * Block snap management from refreshing (#166) --- .licenserc.yaml | 1 + charms/worker/build-snap-installation.sh | 1 + charms/worker/charmcraft.yaml | 18 ++ charms/worker/k8s/build-snap-installation.sh | 7 + charms/worker/k8s/charmcraft.yaml | 15 + charms/worker/k8s/src/charm.py | 47 +-- charms/worker/k8s/src/events/update_status.py | 108 +++++++ charms/worker/k8s/src/protocols.py | 11 + charms/worker/k8s/src/snap.py | 211 +++++++++++-- charms/worker/k8s/tests/unit/test_base.py | 9 +- charms/worker/k8s/tests/unit/test_snap.py | 289 +++++++++++++++--- pyproject.toml | 2 +- tests/integration/conftest.py | 122 ++++++-- .../data/default-snap-installation.tar.gz | 0 .../data/override-latest-edge.tar.gz | Bin 0 -> 265 bytes tests/integration/test_k8s.py | 54 +++- 16 files changed, 772 insertions(+), 123 deletions(-) create mode 120000 charms/worker/build-snap-installation.sh create mode 100755 charms/worker/k8s/build-snap-installation.sh create mode 100644 charms/worker/k8s/src/events/update_status.py create mode 100644 tests/integration/data/default-snap-installation.tar.gz create mode 100644 tests/integration/data/override-latest-edge.tar.gz diff --git a/.licenserc.yaml b/.licenserc.yaml index 9bfd6e8e..05121671 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -19,6 +19,7 @@ header: - 'charms/worker/k8s/lib/charms/k8s/**' paths-ignore: - 'charms/worker/k8s/lib/charms/**' + - 'tests/integration/data/*.tar.gz' - '.github/**' - '**/.gitkeep' - '**/*.cfg' diff --git a/charms/worker/build-snap-installation.sh b/charms/worker/build-snap-installation.sh new file mode 120000 index 00000000..f31920b1 --- /dev/null +++ b/charms/worker/build-snap-installation.sh @@ -0,0 +1 @@ +k8s/build-snap-installation.sh \ No newline at end of file diff --git a/charms/worker/charmcraft.yaml b/charms/worker/charmcraft.yaml index 326a4b9b..6f158ad0 100644 --- a/charms/worker/charmcraft.yaml +++ b/charms/worker/charmcraft.yaml @@ -57,6 +57,7 @@ bases: - name: ubuntu channel: "24.04" architectures: [arm64] + config: options: labels: @@ -68,6 +69,22 @@ config: Note: Due to NodeRestriction, workers are limited to how they can label themselves https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#noderestriction + +resources: + snap-installation: + type: file + filename: snap-installation.tar.gz + description: | + Override charm defined snap installation script + + This charm is designed to operate with a specific revision of snaps, overriding + with anything will indicate that the charm is running an unsupported configuration. + + Content Options: + 0-byte resource (Default) -- Use the charm defined snap installation script + ./snap-installation.yaml -- Overrides the charm defined snap-installation.yaml + ./k8s_XXXX.snap -- Overrides the charm with a specific snap file installed dangerously + parts: charm: plugin: charm @@ -97,6 +114,7 @@ peers: provides: cos-agent: interface: cos_agent + requires: aws: interface: aws-integration diff --git a/charms/worker/k8s/build-snap-installation.sh b/charms/worker/k8s/build-snap-installation.sh new file mode 100755 index 00000000..e54a426f --- /dev/null +++ b/charms/worker/k8s/build-snap-installation.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +# Create an empty tarball to be used as a placeholder for the snap installation override +echo "Creating empty tarball at $1" +touch "${1}" \ No newline at end of file diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index c5b3ceab..19f26f4c 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -190,6 +190,21 @@ config: description: | Enable/Disable the gateway feature on the cluster. +resources: + snap-installation: + type: file + filename: snap-installation.tar.gz + description: | + Override charm defined snap installation script + + This charm is designed to operate with a specific revision of snaps, overriding + with anything will indicate that the charm is running an unsupported configuration. + + Content Options: + 0-byte resource (Default) -- Use the charm defined snap installation script + ./snap-installation.yaml -- Overrides the charm defined snap-installation.yaml + ./k8s_XXXX.snap -- Overrides the charm with a specific snap file installed dangerously + actions: get-kubeconfig: description: Retrieve Public Kubernetes cluster config, including credentials diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 7794ac84..722106b3 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -31,7 +31,6 @@ import charms.operator_libs_linux.v2.snap as snap_lib import containerd import ops -import reschedule import yaml from charms.contextual_status import ReconcilerError, WaitingStatus, on_error from charms.grafana_agent.v0.cos_agent import COSAgentProvider @@ -58,6 +57,7 @@ from charms.reconciler import Reconciler from cloud_integration import CloudIntegration from cos_integration import COSIntegration +from events import update_status from inspector import ClusterInspector from kube_control import configure as configure_kube_control from literals import DEPENDENCIES @@ -143,7 +143,10 @@ def __init__(self, *args): dependency_model=K8sDependenciesModel(**DEPENDENCIES), ) self.cos = COSIntegration(self) - self.reconciler = Reconciler(self, self._reconcile) + self.update_status = update_status.Handler(self) + self.reconciler = Reconciler( + self, self._reconcile, exit_status=self.update_status.active_status + ) self.distributor = TokenDistributor(self, self.get_node_name(), self.api_manager) self.collector = TokenCollector(self, self.get_node_name()) self.labeller = LabelMaker( @@ -164,7 +167,6 @@ def __init__(self, *args): ], ) - self.framework.observe(self.on.update_status, self._on_update_status) if self.is_control_plane: self.etcd = EtcdReactiveRequires(self) self.kube_control = KubeControlProvides(self, endpoint="kube-control") @@ -286,7 +288,7 @@ def get_cloud_name(self) -> str: def _install_snaps(self): """Install snap packages.""" status.add(ops.MaintenanceStatus("Ensuring snap installation")) - snap_management() + snap_management(self) @on_error(WaitingStatus("Waiting to apply snap requirements"), subprocess.CalledProcessError) def _apply_snap_requirements(self): @@ -623,7 +625,8 @@ def _update_kubernetes_version(self): if not relation: status.add(ops.BlockedStatus("Missing cluster integration")) raise ReconcilerError("Missing cluster integration") - if version := snap_version("k8s"): + version, _ = snap_version("k8s") + if version: relation.data[self.unit]["version"] = version @on_error(ops.WaitingStatus("Announcing Kubernetes version")) @@ -636,7 +639,8 @@ def _announce_kubernetes_version(self): ReconcilerError: If the k8s snap is not installed, the version is missing, or the version does not match the local version. """ - if not (local_version := snap_version("k8s")): + local_version, _ = snap_version("k8s") + if not local_version: raise ReconcilerError("k8s-snap is not installed") peer = self.model.get_relation("cluster") @@ -734,7 +738,7 @@ def _death_handler(self, event: ops.EventBase): """ if self.lead_control_plane: self._revoke_cluster_tokens(event) - self._update_status() + self.update_status.run() self._last_gasp() relation = self.model.get_relation("cluster") @@ -774,28 +778,12 @@ def _reconcile(self, event: ops.EventBase): self._join_cluster(event) self._config_containerd_registries() self._configure_cos_integration() - self._update_status() + self.update_status.run() self._apply_node_labels() if self.is_control_plane: self._copy_internal_kubeconfig() self._expose_ports() - def _update_status(self): - """Check k8s snap status.""" - if version := snap_version("k8s"): - self.unit.set_workload_version(version) - - if not self.get_cluster_name(): - status.add(ops.WaitingStatus("Node not Clustered")) - return - - trigger = reschedule.PeriodicEvent(self) - if not self._is_node_ready(): - status.add(ops.WaitingStatus("Node not Ready")) - trigger.create(reschedule.Period(seconds=30)) - return - trigger.cancel() - def _evaluate_removal(self, event: ops.EventBase) -> bool: """Determine if my unit is being removed. @@ -891,17 +879,6 @@ def _apply_node_labels(self): else: log.info("Node %s not yet labelled", node) - def _on_update_status(self, _event: ops.UpdateStatusEvent): - """Handle update-status event.""" - if not self.reconciler.stored.reconciled: - return - - try: - with status.context(self.unit): - self._update_status() - except status.ReconcilerError: - log.exception("Can't update_status") - def kubectl(self, *args) -> str: """Run kubectl command. diff --git a/charms/worker/k8s/src/events/update_status.py b/charms/worker/k8s/src/events/update_status.py new file mode 100644 index 00000000..fead5f92 --- /dev/null +++ b/charms/worker/k8s/src/events/update_status.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +# Learn more at: https://juju.is/docs/sdk + +"""Update status handler for the k8s charm. + +This handler is responsible for updating the unit's workload version and status +""" + +import logging + +import charms.contextual_status as status +import ops +import reschedule +from protocols import K8sCharmProtocol +from snap import version as snap_version + +# Log messages can be retrieved using juju debug-log +log = logging.getLogger(__name__) + + +class DynamicActiveStatus(ops.ActiveStatus): + """An ActiveStatus class that can be updated. + + Attributes: + message (str): explanation of the unit status + prefix (str): Optional prefix to the unit status + postfix (str): Optional postfix to the unit status + """ + + def __init__(self): + """Initialise the DynamicActiveStatus.""" + super().__init__("Ready") + self.prefix: str = "" + self.postfix: str = "" + + @property + def message(self) -> str: + """Return the message for the status.""" + pre = f"{self.prefix} :" if self.prefix else "" + post = f" ({self.postfix})" if self.postfix else "" + return f"{pre}{self._message}{post}" + + @message.setter + def message(self, message: str): + """Set the message for the status. + + Args: + message (str): explanation of the unit status + """ + self._message = message + + +class Handler(ops.Object): + """Handler for the update-status event in a Kubernetes operator. + + This class observes the `update_status` event and handles it by checking the + Kubernetes snap status and updating the unit's workload version accordingly. + + Attributes: + charm (CharmBase): The charm instance that this handler is associated with. + active_status (DynamicActiveStatus): The active status object used to manage + the unit's status during the update process. + """ + + def __init__(self, charm: K8sCharmProtocol): + """Initialize the UpdateStatusEvent. + + Args: + charm: The charm instance that is instantiating this event. + """ + super().__init__(charm, "update_status") + self.charm = charm + self.active_status = DynamicActiveStatus() + self.charm.framework.observe(self.charm.on.update_status, self._on_update_status) + + def _on_update_status(self, _event: ops.UpdateStatusEvent): + """Handle update-status event.""" + if not self.charm.reconciler.stored.reconciled: + return + + try: + with status.context(self.charm.unit, exit_status=self.active_status): + self.run() + except status.ReconcilerError: + log.exception("Can't update_status") + + def run(self): + """Check k8s snap status.""" + version, overridden = snap_version("k8s") + if version: + self.charm.unit.set_workload_version(version) + + self.active_status.postfix = "Snap Override Active" if overridden else "" + + if not self.charm.get_cluster_name(): + status.add(ops.WaitingStatus("Node not Clustered")) + return + + trigger = reschedule.PeriodicEvent(self.charm) + if not self.charm._is_node_ready(): + status.add(ops.WaitingStatus("Node not Ready")) + trigger.create(reschedule.Period(seconds=30)) + return + trigger.cancel() diff --git a/charms/worker/k8s/src/protocols.py b/charms/worker/k8s/src/protocols.py index f06f05c1..0ca5fc64 100644 --- a/charms/worker/k8s/src/protocols.py +++ b/charms/worker/k8s/src/protocols.py @@ -6,6 +6,7 @@ import ops from charms.interface_external_cloud_provider import ExternalCloudProvider from charms.k8s.v0.k8sd_api_manager import K8sdAPIManager +from charms.reconciler import Reconciler from ops.interface_kube_control import KubeControlProvides @@ -16,11 +17,13 @@ class K8sCharmProtocol(ops.CharmBase): api_manager (K8sdAPIManager): The API manager for the charm. kube_control (KubeControlProvides): The kube-control interface. xcp (ExternalCloudProvider): The external cloud provider interface. + reconciler (Reconciler): The reconciler for the charm """ api_manager: K8sdAPIManager kube_control: KubeControlProvides xcp: ExternalCloudProvider + reconciler: Reconciler def get_cluster_name(self) -> str: """Get the cluster name. @@ -37,3 +40,11 @@ def get_cloud_name(self) -> str: NotImplementedError: If the method is not implemented. """ raise NotImplementedError + + def _is_node_ready(self) -> bool: + """Check if the node is ready. + + Raises: + NotImplementedError: If the method is not implemented. + """ + raise NotImplementedError diff --git a/charms/worker/k8s/src/snap.py b/charms/worker/k8s/src/snap.py index 575a9915..e78291b4 100644 --- a/charms/worker/k8s/src/snap.py +++ b/charms/worker/k8s/src/snap.py @@ -10,11 +10,14 @@ import logging import re +import shutil import subprocess +import tarfile from pathlib import Path -from typing import List, Literal, Optional, Union +from typing import List, Literal, Optional, Tuple, Union import charms.operator_libs_linux.v2.snap as snap_lib +import ops import yaml from pydantic import BaseModel, Field, ValidationError, parse_obj_as, validator from typing_extensions import Annotated @@ -23,6 +26,27 @@ log = logging.getLogger(__name__) +def _yaml_read(path: Path) -> dict: + """Read a yaml file into a dictionary. + + Args: + path: The path to the yaml file + """ + with path.open(mode="r", encoding="utf-8") as f: + return yaml.safe_load(f) + + +def _yaml_write(path: Path, content: dict) -> None: + """Write a dictionary to a yaml file. + + Args: + path: The path to the yaml file + content: The dictionary to write + """ + with path.open(mode="w", encoding="utf-8") as f: + yaml.safe_dump(content, f) + + class SnapFileArgument(BaseModel): """Structure to install a snap by file. @@ -31,7 +55,7 @@ class SnapFileArgument(BaseModel): name (str): The name of the snap after installed filename (Path): Path to the snap to locally install classic (bool): If it should be installed as a classic snap - dangerous (bool): If it should be installed as a dangerouse snap + dangerous (bool): If it should be installed as a dangerous snap devmode (bool): If it should be installed as with dev mode enabled """ @@ -91,25 +115,138 @@ def _validate_revision(cls, value: Union[str, int, None]) -> Optional[str]: ] -def _parse_management_arguments() -> List[SnapArgument]: +def _local_arch() -> str: + """Retrieve the local architecture. + + Returns: + str: The architecture of this machine + """ + dpkg_arch = ["dpkg", "--print-architecture"] + return subprocess.check_output(dpkg_arch).decode("UTF-8").strip() + + +def _default_snap_installation() -> Path: + """Return the default snap_installation manifest. + + Returns: + path to the default snap_installation manifest + """ + return Path("templates/snap_installation.yaml") + + +def _overridden_snap_installation() -> Path: + """Return the overridden snap_installation manifest. + + Returns: + path to the overridden snap_installation manifest + """ + return Path("./snap-installation/resource/snap_installation.yaml") + + +def _normalize_paths(snap_installation): + """Normalize the paths in the snap_installation manifest. + + Arguments: + snap_installation: The path to the snap_installation manifest + """ + snap_installation = snap_installation.resolve() + content, updated = _yaml_read(snap_installation), False + for arch, snaps in content.items(): + for idx, snap in enumerate(snaps): + if snap.get("filename"): + resolved = (snap_installation.parent / snap["filename"]).resolve() + log.info("Resolving snap filename: %s to %s", snap["filename"], resolved) + content[arch][idx]["filename"] = str(resolved) + updated = True + if updated: + _yaml_write(snap_installation, content) + + +def _select_snap_installation(charm: ops.CharmBase) -> Path: + """Select the snap_installation manifest. + + Arguments: + charm: The charm instance necessary to check the unit resources + + Returns: + path: The path to the snap_installation manifest + + Raises: + SnapError: when the management issue cannot be resolved + """ + try: + resource_path = charm.model.resources.fetch("snap-installation") + except (ops.ModelError, NameError): + log.error("Something went wrong when claiming 'snap-installation' resource.") + return _default_snap_installation() + + resource_size = resource_path.stat().st_size + log.info("Resource path size: %d bytes", resource_size) + unpack_path = _overridden_snap_installation().parent + shutil.rmtree(unpack_path, ignore_errors=True) + if resource_size == 0: + log.info("Resource size is zero bytes. Use the charm defined snap installation script") + return _default_snap_installation() + + # Unpack the snap-installation resource + unpack_path.mkdir(parents=True, exist_ok=True) + try: + with tarfile.open(resource_path, "r:gz") as tar: + for member in tar.getmembers(): + if member.name.endswith("snap_installation.yaml"): + log.info("Found snap_installation manifest") + tar.extract(member, path=unpack_path) + snap_installation = unpack_path / member.name + _normalize_paths(snap_installation) + return snap_installation + if member.name.endswith(".snap"): + log.info("Found snap_installation snap: %s", member.name) + tar.extract(member, path=unpack_path) + arch = _local_arch() + manifest = { + arch: [ + { + "install-type": "file", + "name": "k8s", + "filename": str(unpack_path / member.name), + "classic": True, + "dangerous": True, + } + ] + } + snap_installation = unpack_path / "snap_installation.yaml" + _yaml_write(snap_installation, manifest) + return snap_installation + except tarfile.TarError as e: + log.error("Failed to extract 'snap-installation:'") + raise snap_lib.SnapError("Invalid snap-installation resource") from e + + log.error("Failed to find a snap file in snap_installation resource") + raise snap_lib.SnapError("Failed to find snap_installation manifest") + + +def _parse_management_arguments(charm: ops.CharmBase) -> List[SnapArgument]: """Parse snap management arguments. + Arguments: + charm: The charm instance necessary to check the unit resources + Raises: SnapError: when the management issue cannot be resolved Returns: Parsed arguments list for the specific host architecture """ - revision = Path("templates/snap_installation.yaml") + revision = _select_snap_installation(charm) if not revision.exists(): raise snap_lib.SnapError(f"Failed to find file={revision}") try: - body = yaml.safe_load(revision.read_text(encoding="utf-8")) + body = _yaml_read(revision) except yaml.YAMLError as e: log.error("Failed to load file=%s, %s", revision, e) raise snap_lib.SnapError(f"Failed to load file={revision}") - dpkg_arch = ["dpkg", "--print-architecture"] - arch = subprocess.check_output(dpkg_arch).decode("UTF-8").strip() + + arch = _local_arch() if not (isinstance(body, dict) and (arch_spec := body.get(arch))): log.warning("Failed to find revision for arch=%s", arch) @@ -126,24 +263,63 @@ def _parse_management_arguments() -> List[SnapArgument]: return args -def management(): - """Manage snap installations on this machine.""" +def management(charm: ops.CharmBase) -> None: + """Manage snap installations on this machine. + + Arguments: + charm: The charm instance + """ cache = snap_lib.SnapCache() - for args in _parse_management_arguments(): + for args in _parse_management_arguments(charm): which = cache[args.name] + if block_refresh(which, args): + continue + install_args = args.dict(exclude_none=True) if isinstance(args, SnapFileArgument) and which.revision != "x1": - snap_lib.install_local(**args.dict(exclude_none=True)) + snap_lib.install_local(**install_args) elif isinstance(args, SnapStoreArgument) and args.revision: if which.revision != args.revision: log.info("Ensuring %s snap revision=%s", args.name, args.revision) - which.ensure(**args.dict(exclude_none=True)) + which.ensure(**install_args) which.hold() elif isinstance(args, SnapStoreArgument): log.info("Ensuring %s snap channel=%s", args.name, args.channel) - which.ensure(**args.dict(exclude_none=True)) + which.ensure(**install_args) + + +def block_refresh(which: snap_lib.Snap, args: SnapArgument) -> bool: + """Block snap refreshes if the snap is in a specific state. + Arguments: + which: The snap to check + args: The snap arguments -def version(snap: str) -> Optional[str]: + Returns: + bool: True if the snap should be blocked from refreshing + """ + if snap_lib.SnapState(which.state) == snap_lib.SnapState.Available: + log.info("Allowing %s snap installation", args.name) + return False + if _overridden_snap_installation().exists(): + log.info("Allowing %s snap refresh due to snap installation override", args.name) + return False + if isinstance(args, SnapStoreArgument) and args.revision: + if block := which.revision != args.revision: + log.info("Blocking %s snap refresh to revision=%s", args.name, args.revision) + else: + log.info("Allowing %s snap refresh to same revision", args.name) + return block + if isinstance(args, SnapStoreArgument): + if block := which.channel != args.channel: + log.info("Blocking %s snap refresh to channel=%s", args.name, args.channel) + else: + log.info("Allowing %s snap refresh to same channel (%s)", args.name, args.channel) + return block + log.info("Blocking %s snap refresh", args.name) + return True + + +def version(snap: str) -> Tuple[Optional[str], bool]: """Retrieve the version of the installed snap package. Arguments: @@ -153,15 +329,16 @@ def version(snap: str) -> Optional[str]: Optional[str]: The version of the installed snap package, or None if not available. """ + overridden = _overridden_snap_installation().exists() try: result = subprocess.check_output(["/usr/bin/snap", "list", snap]) except subprocess.CalledProcessError: - return None + return None, overridden output = result.decode().strip() match = re.search(r"(\d+\.\d+(?:\.\d+)?)", output) if match: - return match.group() + return match.group(), overridden log.info("Snap k8s not found or no version available.") - return None + return None, overridden diff --git a/charms/worker/k8s/tests/unit/test_base.py b/charms/worker/k8s/tests/unit/test_base.py index acff6d85..dc1038ce 100644 --- a/charms/worker/k8s/tests/unit/test_base.py +++ b/charms/worker/k8s/tests/unit/test_base.py @@ -52,7 +52,6 @@ def mock_reconciler_handlers(harness): "_check_k8sd_ready", "_join_cluster", "_configure_cos_integration", - "_update_status", "_apply_node_labels", "_update_kubernetes_version", } @@ -70,9 +69,11 @@ def mock_reconciler_handlers(harness): "_announce_kubernetes_version", } - handlers = [mock.patch(f"charm.K8sCharm.{name}") for name in handler_names] - yield dict(zip(handler_names, (h.start() for h in handlers))) - for handler in handlers: + mocked = [mock.patch(f"charm.K8sCharm.{name}") for name in handler_names] + handlers = dict(zip(handler_names, (m.start() for m in mocked))) + handlers["_update_status"] = mock.patch.object(harness.charm.update_status, "run").start() + yield handlers + for handler in handlers.values(): handler.stop() diff --git a/charms/worker/k8s/tests/unit/test_snap.py b/charms/worker/k8s/tests/unit/test_snap.py index 19c1e4eb..65057caf 100644 --- a/charms/worker/k8s/tests/unit/test_snap.py +++ b/charms/worker/k8s/tests/unit/test_snap.py @@ -6,127 +6,338 @@ # pylint: disable=duplicate-code,missing-function-docstring """Unit tests snap module.""" +import gzip import io import subprocess +import tarfile from pathlib import Path +from textwrap import dedent from unittest import mock +import ops.testing import pytest import snap +from charm import K8sCharm -@mock.patch("pathlib.Path.exists", mock.Mock(return_value=False)) -def test_parse_no_file(): +@pytest.fixture(params=["worker", "control-plane"]) +def harness(request): + """Craft a ops test harness. + + Args: + request: pytest request object + """ + meta = Path(__file__).parent / "../../charmcraft.yaml" + if request.param == "worker": + meta = Path(__file__).parent / "../../../charmcraft.yaml" + harness = ops.testing.Harness(K8sCharm, meta=meta.read_text()) + harness.begin() + harness.charm.is_worker = request.param == "worker" + yield harness + harness.cleanup() + + +@pytest.fixture +def missing_snap_installation(): + """Test missing default snap-installation.""" + with mock.patch("snap._default_snap_installation") as mocked: + mock_path = mocked.return_value + mock_path.exists.return_value = False + yield mocked + mocked.assert_called_once_with() + + +@pytest.fixture +def snap_installation(): + """Test missing default snap-installation.""" + with mock.patch("snap._default_snap_installation") as mocked: + mock_path = mocked.return_value + mock_path.exists.return_value = True + mock_stream = mock_path.open.return_value.__enter__ + yield mock_stream + mocked.assert_called_once_with() + + +@pytest.fixture(autouse=True) +def resource_snap_installation(tmp_path): + """Add snap-installation resource.""" + with mock.patch("snap._overridden_snap_installation") as mocked: + mock_path = Path(tmp_path) / "snap_installation.yaml" + mocked.return_value = mock_path + yield mock_path + + +@pytest.fixture() +def block_refresh(): + """Block snap refresh.""" + with mock.patch("snap.block_refresh") as mocked: + mocked.return_value = False + yield mocked + + +@mock.patch("snap.snap_lib.SnapCache") +@pytest.mark.parametrize( + "state, as_file", + [ + [("present", "1234", None), False], + [("present", None, "edge"), False], + [("present", None, None), True], + ], + ids=[ + "installed & store-by-channel", + "installed & store-by-revision", + "installed & file-without-override", + ], +) +def test_block_refresh(cache, state, as_file, caplog, resource_snap_installation): + """Test block refresh.""" + caplog.set_level(0) + k8s_snap = cache()["k8s"] + k8s_snap.state, k8s_snap.revision, k8s_snap.channel = state + if as_file: + args = snap.SnapFileArgument( + name="k8s", + filename=resource_snap_installation.parent / "k8s_1234.snap", + ) + else: + args = snap.SnapStoreArgument( + name="k8s", + channel="beta" if k8s_snap.channel else None, + revision="5678" if k8s_snap.revision else None, + ) + assert snap.block_refresh(k8s_snap, args) + assert "Blocking k8s snap refresh" in caplog.text + + +@mock.patch("snap.snap_lib.SnapCache") +@pytest.mark.parametrize( + "state, overridden", + [ + [("available", None, None), None], + [("present", "1234", None), None], + [("present", None, "edge"), None], + [("present", None, None), True], + ], + ids=[ + "not installed yet", + "installed & store-by-same-channel", + "installed & store-by-same-revision", + "installed & override", + ], +) +def test_not_block_refresh(cache, state, overridden, caplog, resource_snap_installation): + """Test block refresh.""" + caplog.set_level(0) + k8s_snap = cache()["k8s"] + k8s_snap.state, k8s_snap.revision, k8s_snap.channel = state + if overridden: + resource_snap_installation.write_text( + "amd64:\n- install-type: store\n name: k8s\n channel: edge" + ) + args = snap.SnapStoreArgument( + name="k8s", + channel=k8s_snap.channel, + revision=k8s_snap.revision, + ) + assert not snap.block_refresh(k8s_snap, args) + assert "Allowing k8s snap" in caplog.text + + +@pytest.mark.usefixtures("missing_snap_installation") +def test_parse_no_file(harness): """Test no file exists.""" with pytest.raises(snap.snap_lib.SnapError): - snap._parse_management_arguments() + snap._parse_management_arguments(harness.charm) -@mock.patch("pathlib.Path.exists", mock.Mock(return_value=True)) -@mock.patch("pathlib.Path.open") -def test_parse_invalid_file(mock_open): +def test_parse_invalid_file(snap_installation, harness): """Test file is invalid.""" - mock_open().__enter__.return_value = io.StringIO("example: =") + snap_installation.return_value = io.BytesIO(b"example: =") with pytest.raises(snap.snap_lib.SnapError): - snap._parse_management_arguments() + snap._parse_management_arguments(harness.charm) -@mock.patch("pathlib.Path.exists", mock.Mock(return_value=True)) -@mock.patch("pathlib.Path.open") @mock.patch("subprocess.check_output") -def test_parse_invalid_arch(mock_checkoutput, mock_open): +def test_parse_invalid_arch(mock_checkoutput, snap_installation, harness): """Test file has invalid arch.""" - mock_open().__enter__.return_value = io.StringIO("{}") + snap_installation.return_value = io.BytesIO(b"{}") mock_checkoutput().decode.return_value = "amd64" with pytest.raises(snap.snap_lib.SnapError): - snap._parse_management_arguments() + snap._parse_management_arguments(harness.charm) -@mock.patch("pathlib.Path.exists", mock.Mock(return_value=True)) -@mock.patch("pathlib.Path.open") @mock.patch("subprocess.check_output") -def test_parse_validation_error(mock_checkoutput, mock_open): +def test_parse_validation_error(mock_checkoutput, snap_installation, harness): """Test file cannot be parsed.""" - mock_open().__enter__.return_value = io.StringIO("amd64:\n- {}") + snap_installation.return_value = io.BytesIO(b"amd64:\n- {}") mock_checkoutput().decode.return_value = "amd64" with pytest.raises(snap.snap_lib.SnapError): - snap._parse_management_arguments() + snap._parse_management_arguments(harness.charm) + + +def _create_gzip_tar_string(file_data_dict): + """Create a gzip-compressed tar archive and return it as a base64-encoded string. + + Args: + file_data_dict: Dictionary where keys are filenames and values are file content as strings. + + Returns: + Gzipped tar archive content as a base64-encoded string. + """ + # Create a BytesIO buffer for the tar file + tar_buffer = io.BytesIO() + + # Open a tarfile in the buffer + with tarfile.open(fileobj=tar_buffer, mode="w") as tar: + for filename, file_content in file_data_dict.items(): + # Create a BytesIO buffer for each file's content + file_buffer = io.BytesIO(file_content.encode("utf-8")) + + # Create a tarinfo object with details of the file + tarinfo = tarfile.TarInfo(name=filename) + tarinfo.size = len(file_content) + + # Add the file content to the tar archive + tar.addfile(tarinfo, file_buffer) + + # Get the tar content from the buffer + tar_content = tar_buffer.getvalue() + + # Compress the tar content with gzip + gzip_buffer = io.BytesIO() + with gzip.GzipFile(fileobj=gzip_buffer, mode="wb") as gz: + gz.write(tar_content) + + # Get the gzipped tar content + return gzip_buffer.getvalue() + + +def test_resource_supplied_installation_by_channel(harness): + """Test resource installs by store channel.""" + arch = snap._local_arch() + yaml_data = f"{arch}:\n- install-type: store\n name: k8s\n channel: edge" + file_data = {"./snap_installation.yaml": yaml_data} + harness.add_resource("snap-installation", _create_gzip_tar_string(file_data)) + args = snap._parse_management_arguments(harness.charm) + assert len(args) == 1 + assert isinstance(args[0], snap.SnapStoreArgument) + assert args[0].channel == "edge" + assert args[0].name == "k8s" + assert args[0].install_type == "store" + + +def test_resource_supplied_installation_by_filename(harness, resource_snap_installation): + """Test resource installs by included filename.""" + arch = snap._local_arch() + yaml_data = dedent( + f""" + {arch}: + - install-type: file + name: k8s + filename: ./k8s_xxxx.snap + dangerous: true + classic: true + """ + ).strip() + file_data = {"./snap_installation.yaml": yaml_data, "./k8s_xxxx.snap": ""} + harness.add_resource("snap-installation", _create_gzip_tar_string(file_data)) + args = snap._parse_management_arguments(harness.charm) + assert len(args) == 1 + assert isinstance(args[0], snap.SnapFileArgument) + assert args[0].install_type == "file" + assert args[0].name == "k8s" + assert args[0].filename == resource_snap_installation.parent / "k8s_xxxx.snap" + assert args[0].dangerous + assert args[0].classic + + +def test_resource_supplied_snap(harness, resource_snap_installation): + """Test resource installs by snap only.""" + file_data = {"./k8s_xxxx.snap": ""} + harness.add_resource("snap-installation", _create_gzip_tar_string(file_data)) + args = snap._parse_management_arguments(harness.charm) + assert len(args) == 1 + assert isinstance(args[0], snap.SnapFileArgument) + assert args[0].name == "k8s" + assert args[0].install_type == "file" + assert args[0].filename == resource_snap_installation.parent / "k8s_xxxx.snap" + assert args[0].dangerous + assert args[0].classic -@mock.patch("pathlib.Path.exists", mock.Mock(return_value=True)) -@mock.patch("pathlib.Path.open") @mock.patch("subprocess.check_output") -def test_parse_valid_store(mock_checkoutput, mock_open): +def test_parse_valid_store(mock_checkoutput, snap_installation, harness): """Test file parses as store content.""" - content = """ + content = b""" amd64: - install-type: store name: k8s channel: edge """ - mock_open().__enter__.return_value = io.StringIO(content) + snap_installation.return_value = io.BytesIO(content) mock_checkoutput().decode.return_value = "amd64" - args = snap._parse_management_arguments() + args = snap._parse_management_arguments(harness.charm) assert args == [ snap.SnapStoreArgument(name="k8s", channel="edge"), ] -@mock.patch("pathlib.Path.exists", mock.Mock(return_value=True)) -@mock.patch("pathlib.Path.open") @mock.patch("subprocess.check_output") -def test_parse_valid_file(mock_checkoutput, mock_open): +def test_parse_valid_file(mock_checkoutput, snap_installation, harness): """Test file parses as file content.""" - content = """ + content = b""" amd64: - install-type: file name: k8s filename: path/to/thing """ - mock_open().__enter__.return_value = io.StringIO(content) + snap_installation.return_value = io.BytesIO(content) mock_checkoutput().decode.return_value = "amd64" - args = snap._parse_management_arguments() + args = snap._parse_management_arguments(harness.charm) assert args == [ snap.SnapFileArgument(name="k8s", filename=Path("path/to/thing")), ] +@pytest.mark.usefixtures("block_refresh") @mock.patch("snap._parse_management_arguments") @mock.patch("snap.snap_lib.install_local") @mock.patch("snap.snap_lib.SnapCache") -def test_management_installs_local(cache, install_local, args): +def test_management_installs_local(cache, install_local, args, harness): """Test installer uses local installer.""" k8s_snap = cache()["k8s"] args.return_value = [snap.SnapFileArgument(name="k8s", filename=Path("path/to/thing"))] - snap.management() + snap.management(harness.charm) k8s_snap.ensure.assert_not_called() install_local.assert_called_once_with(filename=Path("path/to/thing")) +@pytest.mark.usefixtures("block_refresh") @mock.patch("snap._parse_management_arguments") @mock.patch("snap.snap_lib.install_local") @mock.patch("snap.snap_lib.SnapCache") @pytest.mark.parametrize("revision", [None, "123"]) -def test_management_installs_store_from_channel(cache, install_local, args, revision): +def test_management_installs_store_from_channel(cache, install_local, args, revision, harness): """Test installer uses store installer.""" k8s_snap = cache()["k8s"] k8s_snap.revision = revision args.return_value = [snap.SnapStoreArgument(name="k8s", channel="edge")] - snap.management() + snap.management(harness.charm) install_local.assert_not_called() k8s_snap.ensure.assert_called_once_with(state=snap.snap_lib.SnapState.Present, channel="edge") +@pytest.mark.usefixtures("block_refresh") @mock.patch("snap._parse_management_arguments") @mock.patch("snap.snap_lib.install_local") @mock.patch("snap.snap_lib.SnapCache") @pytest.mark.parametrize("revision", [None, "456", "123"]) -def test_management_installs_store_from_revision(cache, install_local, args, revision): +def test_management_installs_store_from_revision(cache, install_local, args, revision, harness): """Test installer uses store installer.""" k8s_snap = cache()["k8s"] k8s_snap.revision = revision args.return_value = [snap.SnapStoreArgument(name="k8s", revision=123)] - snap.management() + snap.management(harness.charm) install_local.assert_not_called() if revision == "123": k8s_snap.ensure.assert_not_called() @@ -140,13 +351,13 @@ def test_management_installs_store_from_revision(cache, install_local, args, rev def test_version(check_output): """Test snap list returns the correct version.""" check_output.return_value = b"" - assert snap.version(snap="k8s") is None + assert snap.version(snap="k8s") == (None, False) check_output.return_value = """ Name Version Rev Tracking Publisher Notes k8s 1.30.0 1234 latest/stable canonical✓ """.encode() - assert snap.version(snap="k8s") == "1.30.0" + assert snap.version(snap="k8s") == ("1.30.0", False) check_output.side_effect = subprocess.CalledProcessError(-1, [], None, None) - assert snap.version(snap="k8s") is None + assert snap.version(snap="k8s") == (None, False) diff --git a/pyproject.toml b/pyproject.toml index 363ba3ba..fdf9c317 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,4 +86,4 @@ max-complexity = 10 skip = "build,lib,venv,icon.svg,.tox,.git,.mypy_cache,.ruff_cache,.coverage" [tool.pyright] -extraPaths = ["./charms/worker/k8s/lib"] +extraPaths = ["./charms/worker/k8s/lib","./charms/worker/k8s/src"] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 42a37073..f4a147a6 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -17,6 +17,7 @@ import pytest import pytest_asyncio import yaml +from juju.application import Application from juju.model import Model from juju.tag import untag from kubernetes import config as k8s_config @@ -27,18 +28,24 @@ from .helpers import get_unit_cidrs, is_deployed log = logging.getLogger(__name__) +TEST_DATA = Path(__file__).parent / "data" +DEFAULT_SNAP_INSTALLATION = TEST_DATA / "default-snap-installation.tar.gz" +DEFAULT_RESOURCES = {"snap-installation": None} def pytest_addoption(parser: pytest.Parser): """Parse additional pytest options. - --charm-file can be called multiple times for each - supplied charm + --charm-file can be used multiple times, specifies which local charm files are available + --upgrade-from instruct tests to start with a specific channel, and upgrade to these charms Args: parser: Pytest parser. """ parser.addoption("--charm-file", dest="charm_files", action="append", default=[]) + parser.addoption( + "--snap-installation-resource", default=str(DEFAULT_SNAP_INSTALLATION.resolve()) + ) parser.addoption("--cos", action="store_true", default=False, help="Run COS integration tests") parser.addoption( "--apply-proxy", action="store_true", default=False, help="Apply proxy to model-config" @@ -49,6 +56,9 @@ def pytest_addoption(parser: pytest.Parser): default=False, help="If cloud is LXD, use containers", ) + parser.addoption( + "--upgrade-from", dest="upgrade_from", default=None, help="Charms channel to upgrade from" + ) def pytest_configure(config): @@ -173,6 +183,28 @@ class Bundle: arch: str _content: Mapping = field(default_factory=dict) + @classmethod + async def create(cls, ops_test: OpsTest, path: Path) -> "Bundle": + """Create a bundle object. + + Args: + ops_test: Instance of the pytest-operator plugin + path: Path to the bundle file + + Returns: + Bundle: Instance of the Bundle + """ + arch = await cloud_arch(ops_test) + _type, _vms = await cloud_type(ops_test) + bundle = cls(ops_test, path, arch) + if _type == "lxd" and not _vms: + log.info("Drop lxd machine constraints") + bundle.drop_constraints() + if _type == "lxd" and _vms: + log.info("Constrain lxd machines with virt-type: virtual-machine") + bundle.add_constraints({"virt-type": "virtual-machine"}) + return bundle + @property def content(self) -> Mapping: """Yaml content of the bundle loaded into a dict""" @@ -198,16 +230,27 @@ def render(self) -> Path: yaml.safe_dump(self.content, target.open("w")) return target - def switch(self, name: str, path: Path): - """Replace charmhub application with a local charm path. + def switch(self, name: str, path: Optional[Path] = None, channel: Optional[str] = None): + """Replace charmhub application with a local charm path or specific channel. Args: - name (str): Which application - path (Path): Path to local charm + name (str): Which application + path (Path): Optional path to local charm + channel (str): Optional channel to use + + Raises: + ValueError: if both path and channel are provided, or neither are provided """ app = self.applications[name] - app["charm"] = str(path.resolve()) - app["channel"] = None + if (not path and not channel) or (path and channel): + raise ValueError("channel and path are mutually exclusive") + if path: + app["charm"] = str(path.resolve()) + app["channel"] = None + app["resources"] = DEFAULT_RESOURCES + if channel: + app["charm"] = name + app["channel"] = channel def drop_constraints(self): """Remove constraints on applications. Useful for testing on lxd.""" @@ -282,7 +325,7 @@ async def cloud_proxied(ops_test: OpsTest): assert ops_test.model, "Model must be present" controller = await ops_test.model.get_controller() controller_model = await controller.get_model("controller") - proxy_config_file = Path(__file__).parent / "data" / "static-proxy-config.yaml" + proxy_config_file = TEST_DATA / "static-proxy-config.yaml" proxy_configs = yaml.safe_load(proxy_config_file.read_text()) local_no_proxy = await get_unit_cidrs(controller_model, "controller", 0) no_proxy = {*proxy_configs["juju-no-proxy"], *local_no_proxy} @@ -352,7 +395,7 @@ async def deploy_model( def bundle_file(request) -> Path: - """Fixture to get bundle file. + """Helper to get bundle file. Args: request: pytest request object @@ -379,30 +422,57 @@ async def kubernetes_cluster(request: pytest.FixtureRequest, ops_test: OpsTest): yield ops_test.model return - log.info("Deploying cluster using %s bundle.", bundle_file) - arch = await cloud_arch(ops_test) + log.info("Deploying cluster using %s bundle.", bundle_path) - charm_path = ("worker/k8s", "worker") - charms = [Charm(ops_test, arch, Path("charms") / p) for p in charm_path] - charm_files = await asyncio.gather( - *[charm.resolve(request.config.option.charm_files) for charm in charms] - ) - bundle = Bundle(ops_test, bundle_path, arch) - _type, _vms = await cloud_type(ops_test) - if _type == "lxd" and not _vms: - log.info("Drop lxd machine constraints") - bundle.drop_constraints() - if _type == "lxd" and _vms: - log.info("Constrain lxd machines with virt-type: virtual-machine") - bundle.add_constraints({"virt-type": "virtual-machine"}) + bundle = await Bundle.create(ops_test, bundle_path) if request.config.option.apply_proxy: await cloud_proxied(ops_test) + + charms = [Charm(ops_test, bundle.arch, Path("charms") / p) for p in ("worker/k8s", "worker")] + charm_files_args = request.config.option.charm_files + DEFAULT_RESOURCES["snap-installation"] = request.config.option.snap_installation_resource + charm_files = await asyncio.gather(*[charm.resolve(charm_files_args) for charm in charms]) + switch_to_path = {} for path, charm in zip(charm_files, charms): - bundle.switch(charm.app_name, path) + if upgrade_channel := request.config.option.upgrade_from: + bundle.switch(charm.app_name, channel=upgrade_channel) + switch_to_path[charm.app_name] = path + else: + bundle.switch(charm.app_name, path=path) + async with deploy_model(request, ops_test, model, bundle) as the_model: + await upgrade_model(the_model, switch_to_path) yield the_model +async def upgrade_model(model: Model, switch_to_path: dict[str, Path]): + """Upgrade the model with the provided charms. + + Args: + model: Juju model + switch_to_path: Mapping of app_name to charm + + """ + if not switch_to_path: + return + + async def _refresh(app_name: str): + """Refresh the application. + + Args: + app_name: Name of the application to refresh + """ + app: Application = model.applications[app_name] + await app.refresh(path=switch_to_path[app_name], resources=DEFAULT_RESOURCES) + + await asyncio.gather(*[_refresh(app) for app in switch_to_path]) + await model.wait_for_idle( + apps=list(switch_to_path.keys()), + status="active", + timeout=30 * 60, + ) + + @pytest_asyncio.fixture(name="_grafana_agent", scope="module") async def grafana_agent(kubernetes_cluster: Model): """Deploy Grafana Agent.""" diff --git a/tests/integration/data/default-snap-installation.tar.gz b/tests/integration/data/default-snap-installation.tar.gz new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/data/override-latest-edge.tar.gz b/tests/integration/data/override-latest-edge.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0501334631ded519838533929ef7ef7e4fee16c5 GIT binary patch literal 265 zcmV+k0rvhMiwFR5lk8>y1MQN*YQr!PM1A&G4CEX~a%`#iYH|pKUh)BDu@pyCmW`zx z{Pz_lgi%b9t%mh*4<3n^Iw zc25&?{PX-*sQY0WO?wW=Sze$f*E`dY#UZFHUE!!T4*Poddfee;tj4JyVNIi*H?D=! zfsFN8no_4kQKi5djyhf8{l&)>aQ$5CFb%QlLqF=zdUGb{w5_03t5X`g-(3IeK`tyT PEIb(>h-23O015yA^w@!S literal 0 HcmV?d00001 diff --git a/tests/integration/test_k8s.py b/tests/integration/test_k8s.py index c6a6da90..258c039d 100644 --- a/tests/integration/test_k8s.py +++ b/tests/integration/test_k8s.py @@ -7,8 +7,10 @@ import asyncio import logging +from pathlib import Path import pytest +import pytest_asyncio from juju import application, model from tenacity import retry, stop_after_attempt, wait_fixed @@ -19,7 +21,7 @@ log = logging.getLogger(__name__) -async def get_leader(app): +async def get_leader(app) -> int: """Find leader unit of an application. Args: @@ -27,11 +29,15 @@ async def get_leader(app): Returns: int: index to leader unit + + Raises: + ValueError: No leader found """ is_leader = await asyncio.gather(*(u.is_leader_from_status() for u in app.units)) for idx, flag in enumerate(is_leader): if flag: return idx + raise ValueError("No leader found") @pytest.mark.abort_on_fail @@ -134,6 +140,52 @@ async def test_remove_leader_control_plane(kubernetes_cluster: model.Model): await ready_nodes(follower, expected_nodes) +@pytest_asyncio.fixture() +async def override_snap_on_k8s(kubernetes_cluster: model.Model, request): + """ + Override the snap resource on a Kubernetes cluster application and revert it after the test. + + This coroutine function overrides the snap resource of the "k8s" application in the given + Kubernetes cluster with a specified override file, waits for the cluster to become idle, + and then reverts the snap resource back to its original state after the test. + + Args: + kubernetes_cluster (model.Model): The Kubernetes cluster model. + request: The pytest request object containing test configuration options. + + Yields: + The "k8s" application object after the snap resource has been overridden. + + Raises: + AssertionError: If the "k8s" application is not found in the Kubernetes cluster. + """ + k8s = kubernetes_cluster.applications["k8s"] + assert k8s, "k8s application not found" + # Override snap resource + revert = Path(request.config.option.snap_installation_resource) + override = Path(__file__).parent / "data" / "override-latest-edge.tar.gz" + + with override.open("rb") as obj: + k8s.attach_resource("snap-installation", override, obj) + await kubernetes_cluster.wait_for_idle(status="active", timeout=1 * 60) + + yield k8s + + with revert.open("rb") as obj: + k8s.attach_resource("snap-installation", revert, obj) + await kubernetes_cluster.wait_for_idle(status="active", timeout=1 * 60) + + +@pytest.mark.abort_on_fail +async def test_override_snap_resource(override_snap_on_k8s: application.Application): + """Override snap resource.""" + k8s = override_snap_on_k8s + assert k8s, "k8s application not found" + + for unit in k8s.units: + assert "Override" in unit.workload_status_message + + @pytest.mark.cos @retry(reraise=True, stop=stop_after_attempt(12), wait=wait_fixed(60)) async def test_grafana( From 6e2a0c8e053043b079e9a46d8dcb66101694b3c5 Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Thu, 21 Nov 2024 15:31:59 -0600 Subject: [PATCH 05/13] Ceph-test use image registry from rocks.canonical.com (#182) * Extra debugging on ceph-tests * Switch to pulling busybox image from rocks.cc --- .../data/test_ceph/pv-reader-pod.yaml | 2 +- .../data/test_ceph/pv-writer-pod.yaml | 2 +- tests/integration/helpers.py | 36 ++++++++---- tests/integration/test_ceph.py | 55 +++++++++++-------- 4 files changed, 57 insertions(+), 38 deletions(-) diff --git a/tests/integration/data/test_ceph/pv-reader-pod.yaml b/tests/integration/data/test_ceph/pv-reader-pod.yaml index c87c8691..9f75c946 100755 --- a/tests/integration/data/test_ceph/pv-reader-pod.yaml +++ b/tests/integration/data/test_ceph/pv-reader-pod.yaml @@ -14,7 +14,7 @@ spec: claimName: raw-block-pvc containers: - name: pv-reader - image: busybox + image: rocks.canonical.com/cdk/busybox:1.36 command: ["/bin/sh", "-c", "cat /pvc/test_file"] volumeMounts: - name: pvc-test diff --git a/tests/integration/data/test_ceph/pv-writer-pod.yaml b/tests/integration/data/test_ceph/pv-writer-pod.yaml index 129849b5..7b659add 100755 --- a/tests/integration/data/test_ceph/pv-writer-pod.yaml +++ b/tests/integration/data/test_ceph/pv-writer-pod.yaml @@ -14,7 +14,7 @@ spec: claimName: raw-block-pvc containers: - name: pv-writer - image: busybox + image: rocks.canonical.com/cdk/busybox:1.36 command: ["/bin/sh", "-c", "echo 'PVC test data.' > /pvc/test_file"] volumeMounts: - name: pvc-test diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index c1cee35a..dae1b2ba 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -7,13 +7,14 @@ import ipaddress import json import logging +import shlex from pathlib import Path from typing import List import yaml from juju import unit from juju.model import Model -from tenacity import AsyncRetrying, retry, stop_after_attempt, wait_fixed +from tenacity import AsyncRetrying, before_sleep_log, retry, stop_after_attempt, wait_fixed log = logging.getLogger(__name__) @@ -125,10 +126,10 @@ async def ready_nodes(k8s, expected_count): async def wait_pod_phase( k8s: unit.Unit, name: str, - phase: str, + *phase: str, namespace: str = "default", retry_times: int = 30, - retry_delay_s: int = 5, + retry_delay_s: int = 15, ): """Wait for the pod to reach the specified phase (e.g. Succeeded). @@ -142,23 +143,34 @@ async def wait_pod_phase( """ async for attempt in AsyncRetrying( - stop=stop_after_attempt(retry_times), wait=wait_fixed(retry_delay_s) + stop=stop_after_attempt(retry_times), + wait=wait_fixed(retry_delay_s), + before_sleep=before_sleep_log(log, logging.WARNING), ): with attempt: - cmd = " ".join( + cmd = shlex.join( [ - "k8s kubectl wait", - f"--namespace {namespace}", - "--for=jsonpath='{.status.phase}'=" + phase, + "k8s", + "kubectl", + "get", + "--namespace", + namespace, + "-o", + "jsonpath={.status.phase}", f"pod/{name}", - "--timeout 1s", ] ) action = await k8s.run(cmd) result = await action.wait() - assert ( - result.results["return-code"] == 0 - ), f"Failed waiting for pod to reach {phase} phase." + stdout, stderr = ( + result.results.get(field, "").strip() for field in ["stdout", "stderr"] + ) + assert result.results["return-code"] == 0, ( + f"\nPod hasn't reached phase: {phase}\n" + f"\tstdout: '{stdout}'\n" + f"\tstderr: '{stderr}'" + ) + assert stdout in phase, f"Pod {name} not yet in phase {phase} ({stdout})" async def get_pod_logs( diff --git a/tests/integration/test_ceph.py b/tests/integration/test_ceph.py index 0bfc02d8..61bcb39f 100644 --- a/tests/integration/test_ceph.py +++ b/tests/integration/test_ceph.py @@ -37,29 +37,36 @@ async def test_ceph_sc(kubernetes_cluster: model.Model): assert "rbd.csi.ceph.com" in stdout, f"No ceph provisioner found in: {stdout}" # Copy pod definitions. - for fname in ["ceph-xfs-pvc.yaml", "pv-writer-pod.yaml", "pv-reader-pod.yaml"]: + definitions = ["ceph-xfs-pvc.yaml", "pv-writer-pod.yaml", "pv-reader-pod.yaml"] + for fname in definitions: await k8s.scp_to(_get_data_file_path(fname), f"/tmp/{fname}") - # Create "ceph-xfs" PVC. - event = await k8s.run("k8s kubectl apply -f /tmp/ceph-xfs-pvc.yaml") - result = await event.wait() - assert result.results["return-code"] == 0, "Failed to create pvc." - - # Create a pod that writes to the Ceph PV. - event = await k8s.run("k8s kubectl apply -f /tmp/pv-writer-pod.yaml") - result = await event.wait() - assert result.results["return-code"] == 0, "Failed to create writer pod." - - # Wait for the pod to exit successfully. - await helpers.wait_pod_phase(k8s, "pv-writer-test", "Succeeded") - - # Create a pod that reads the PV data and writes it to the log. - event = await k8s.run("k8s kubectl apply -f /tmp/pv-reader-pod.yaml") - result = await event.wait() - assert result.results["return-code"] == 0, "Failed to create reader pod." - - await helpers.wait_pod_phase(k8s, "pv-reader-test", "Succeeded") - - # Check the logged PV data. - logs = await helpers.get_pod_logs(k8s, "pv-reader-test") - assert "PVC test data" in logs + try: + # Create "ceph-xfs" PVC. + event = await k8s.run("k8s kubectl apply -f /tmp/ceph-xfs-pvc.yaml") + result = await event.wait() + assert result.results["return-code"] == 0, "Failed to create pvc." + + # Create a pod that writes to the Ceph PV. + event = await k8s.run("k8s kubectl apply -f /tmp/pv-writer-pod.yaml") + result = await event.wait() + assert result.results["return-code"] == 0, "Failed to create writer pod." + + # Wait for the pod to exit successfully. + await helpers.wait_pod_phase(k8s, "pv-writer-test", "Succeeded") + + # Create a pod that reads the PV data and writes it to the log. + event = await k8s.run("k8s kubectl apply -f /tmp/pv-reader-pod.yaml") + result = await event.wait() + assert result.results["return-code"] == 0, "Failed to create reader pod." + + await helpers.wait_pod_phase(k8s, "pv-reader-test", "Succeeded") + + # Check the logged PV data. + logs = await helpers.get_pod_logs(k8s, "pv-reader-test") + assert "PVC test data" in logs + finally: + # Cleanup + for fname in reversed(definitions): + event = await k8s.run(f"k8s kubectl delete -f /tmp/{fname}") + result = await event.wait() From 4cdc7f8ef95563201ff5354f3cb586075a3d0cae Mon Sep 17 00:00:00 2001 From: Homayoon Alimohammadi Date: Fri, 22 Nov 2024 03:19:58 +0400 Subject: [PATCH 06/13] Add Network feature config options (#175) Co-authored-by: Adam Dyess --- charms/worker/k8s/charmcraft.yaml | 5 ++ charms/worker/k8s/src/charm.py | 5 ++ .../k8s/tests/unit/test_config_options.py | 52 +++++++++++++++++++ pyproject.toml | 2 +- 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 charms/worker/k8s/tests/unit/test_config_options.py diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 19f26f4c..8f062a00 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -189,6 +189,11 @@ config: default: false description: | Enable/Disable the gateway feature on the cluster. + network-enabled: + type: boolean + default: true + description: | + Enables or disables the network feature. resources: snap-installation: diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 722106b3..68dd36e2 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -420,6 +420,10 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: enabled=self.config.get("gateway-enabled"), ) + network = NetworkConfig( + enabled=self.config.get("network-enabled"), + ) + cloud_provider = None if self.xcp.has_xcp: cloud_provider = "external" @@ -427,6 +431,7 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: return UserFacingClusterConfig( local_storage=local_storage, gateway=gateway, + network=network, annotations=self._get_valid_annotations(), cloud_provider=cloud_provider, ) diff --git a/charms/worker/k8s/tests/unit/test_config_options.py b/charms/worker/k8s/tests/unit/test_config_options.py new file mode 100644 index 00000000..13bb4f49 --- /dev/null +++ b/charms/worker/k8s/tests/unit/test_config_options.py @@ -0,0 +1,52 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +# Learn more about testing at: https://juju.is/docs/sdk/testing + +# pylint: disable=duplicate-code,missing-function-docstring +"""Unit tests.""" + + +from pathlib import Path + +import ops +import ops.testing +import pytest +from charm import K8sCharm + + +@pytest.fixture(params=["worker", "control-plane"]) +def harness(request): + """Craft a ops test harness. + + Args: + request: pytest request object + """ + meta = Path(__file__).parent / "../../charmcraft.yaml" + if request.param == "worker": + meta = Path(__file__).parent / "../../../charmcraft.yaml" + harness = ops.testing.Harness(K8sCharm, meta=meta.read_text()) + harness.begin() + harness.charm.is_worker = request.param == "worker" + yield harness + harness.cleanup() + + +def test_configure_network_options(harness): + """Test configuring the network options. + + Args: + harness: the harness under test + """ + if harness.charm.is_worker: + pytest.skip("Not applicable on workers") + + harness.disable_hooks() + + harness.update_config({"network-enabled": False}) + ufcg = harness.charm._assemble_cluster_config() + assert not ufcg.network.enabled, "Network should be disabled" + + harness.update_config({"network-enabled": True}) + ufcg = harness.charm._assemble_cluster_config() + assert ufcg.network.enabled, "Network should be enabled" diff --git a/pyproject.toml b/pyproject.toml index fdf9c317..bffeb7fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,4 +86,4 @@ max-complexity = 10 skip = "build,lib,venv,icon.svg,.tox,.git,.mypy_cache,.ruff_cache,.coverage" [tool.pyright] -extraPaths = ["./charms/worker/k8s/lib","./charms/worker/k8s/src"] +extraPaths = ["./charms/worker/k8s/lib", "./charms/worker/k8s/src"] From fb5397b4c1eb5492ad931dad7e14087016256cf9 Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Thu, 21 Nov 2024 19:18:01 -0600 Subject: [PATCH 07/13] Rename charm configurations to include new prefixes (#179) * Rename charm configurations to include new prefixes * use accepted node-base library * Improve the docs for service-cidr and pod-cidr --- charms/worker/charmcraft.yaml | 2 +- charms/worker/k8s/charmcraft.yaml | 63 ++++++++++++++------ charms/worker/k8s/requirements.txt | 2 +- charms/worker/k8s/src/charm.py | 19 +++--- charms/worker/k8s/src/kube_control.py | 4 +- charms/worker/k8s/tests/unit/test_base.py | 5 +- tests/integration/data/test-bundle-ceph.yaml | 2 + tests/integration/data/test-bundle-etcd.yaml | 3 +- tests/integration/data/test-bundle.yaml | 2 + tests/integration/test_k8s.py | 2 +- 10 files changed, 69 insertions(+), 35 deletions(-) diff --git a/charms/worker/charmcraft.yaml b/charms/worker/charmcraft.yaml index 6f158ad0..2d7602ed 100644 --- a/charms/worker/charmcraft.yaml +++ b/charms/worker/charmcraft.yaml @@ -60,7 +60,7 @@ bases: config: options: - labels: + node-labels: default: "" type: string description: | diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 8f062a00..72b2b452 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -81,7 +81,7 @@ config: Example: e.g.: key1=value1 key2=value2 - containerd_custom_registries: + containerd-custom-registries: type: string default: "[]" description: | @@ -128,39 +128,63 @@ config: "key_file": "'"$(base64 -w 0 < ~/my.custom.key.pem)"'", }]' - datastore: + bootstrap-datastore: default: dqlite type: string description: | The datastore to use in Canonical Kubernetes. This cannot be changed after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is chosen, the charm should be integrated with the etcd charm. - labels: - default: "" - type: string - description: | - Labels can be used to organize and to select subsets of nodes in the - cluster. Declare node labels in key=value format, separated by spaces. - register-with-taints: + bootstrap-node-taints: type: string default: "" description: | Space-separated list of taints to apply to this node at registration time. - This config is only used at deploy time when Kubelet first registers the + This config is only used at bootstrap time when Kubelet first registers the node with Kubernetes. To change node taints after deploy time, use kubectl instead. For more information, see the upstream Kubernetes documentation about taints: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ - service-cidr: + bootstrap-pod-cidr: + type: string + default: "10.1.0.0/16" + description: | + Comma-separated CIDR blocks for IP addresses that can be assigned + to pods within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" + bootstrap-service-cidr: type: string default: 10.152.183.0/24 description: | - CIDR to use for Kubernetes services. After deployment it is - only possible to increase the size of the IP range. It is not possible to - change or shrink the address range after deployment. + Comma-separated CIDR blocks for IP addresses that can be assigned + to services within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" + gateway-enabled: + type: boolean + default: false + description: | + Enable/Disable the gateway feature on the cluster. local-storage-enabled: type: boolean default: true @@ -184,16 +208,17 @@ config: "Retain". If set to "Delete", the storage will be deleted when the PersistentVolumeClaim is deleted. If set to "Retain", the storage will be retained when the PersistentVolumeClaim is deleted. - gateway-enabled: - type: boolean - default: false - description: | - Enable/Disable the gateway feature on the cluster. network-enabled: type: boolean default: true description: | Enables or disables the network feature. + node-labels: + default: "" + type: string + description: | + Labels can be used to organize and to select subsets of nodes in the + cluster. Declare node labels in key=value format, separated by spaces. resources: snap-installation: diff --git a/charms/worker/k8s/requirements.txt b/charms/worker/k8s/requirements.txt index 9e532201..3c639667 100644 --- a/charms/worker/k8s/requirements.txt +++ b/charms/worker/k8s/requirements.txt @@ -1,6 +1,6 @@ charm-lib-contextual-status @ git+https://github.com/charmed-kubernetes/charm-lib-contextual-status@255dd4a23defc16dcdac832306e5f460a0f1200c charm-lib-interface-external-cloud-provider @ git+https://github.com/charmed-kubernetes/charm-lib-interface-external-cloud-provider@e1c5fc69e98100a7d43c0ad5a7969bba1ecbcd40 -charm-lib-node-base @ git+https://github.com/charmed-kubernetes/layer-kubernetes-node-base@9b212854e768f13c26cc907bed51444e97e51b50#subdirectory=ops +charm-lib-node-base @ git+https://github.com/charmed-kubernetes/layer-kubernetes-node-base@a14d685237302711113ac651920476437b3b9785#subdirectory=ops charm-lib-reconciler @ git+https://github.com/charmed-kubernetes/charm-lib-reconciler@f818cc30d1a22be43ffdfecf7fbd9c3fd2967502 ops-interface-kube-control @ git+https://github.com/charmed-kubernetes/interface-kube-control.git@main#subdirectory=ops ops.interface_aws @ git+https://github.com/charmed-kubernetes/interface-aws-integration@main#subdirectory=ops diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 68dd36e2..95fa9950 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -150,7 +150,11 @@ def __init__(self, *args): self.distributor = TokenDistributor(self, self.get_node_name(), self.api_manager) self.collector = TokenCollector(self, self.get_node_name()) self.labeller = LabelMaker( - self, kubeconfig_path=self._internal_kubeconfig, kubectl=KUBECTL_PATH + self, + kubeconfig_path=self._internal_kubeconfig, + kubectl=KUBECTL_PATH, + user_label_key="node-labels", + timeout=15, ) self._stored.set_default(is_dying=False, cluster_name=str()) @@ -324,8 +328,9 @@ def _bootstrap_k8s_snap(self): bootstrap_config = BootstrapConfig.construct() self._configure_datastore(bootstrap_config) bootstrap_config.cluster_config = self._assemble_cluster_config() - bootstrap_config.service_cidr = str(self.config["service-cidr"]) - bootstrap_config.control_plane_taints = str(self.config["register-with-taints"]).split() + bootstrap_config.service_cidr = str(self.config["bootstrap-service-cidr"]) + bootstrap_config.pod_cidr = str(self.config["bootstrap-pod-cidr"]) + bootstrap_config.control_plane_taints = str(self.config["bootstrap-node-taints"]).split() bootstrap_config.extra_sans = [_get_public_address()] bootstrap_config.extra_node_kube_controller_manager_args = { "--cluster-name": self._generate_unique_cluster_name() @@ -354,7 +359,7 @@ def _config_containerd_registries(self): registries, config = [], "" containerd_relation = self.model.get_relation("containerd") if self.is_control_plane: - config = str(self.config["containerd_custom_registries"]) + config = str(self.config["containerd-custom-registries"]) registries = containerd.parse_registries(config) else: registries = containerd.recover(containerd_relation) @@ -416,9 +421,7 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: # https://github.com/canonical/k8s-operator/pull/169/files#r1847378214 ) - gateway = GatewayConfig( - enabled=self.config.get("gateway-enabled"), - ) + gateway = GatewayConfig(enabled=self.config.get("gateway-enabled")) network = NetworkConfig( enabled=self.config.get("network-enabled"), @@ -444,7 +447,7 @@ def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfi The configuration object for the Kubernetes cluster. This object will be modified in-place to include etcd's configuration details. """ - datastore = self.config.get("datastore") + datastore = self.config.get("bootstrap-datastore") if datastore not in SUPPORTED_DATASTORES: log.error( diff --git a/charms/worker/k8s/src/kube_control.py b/charms/worker/k8s/src/kube_control.py index facb5796..02c8ea2f 100644 --- a/charms/worker/k8s/src/kube_control.py +++ b/charms/worker/k8s/src/kube_control.py @@ -25,8 +25,8 @@ def configure(charm: K8sCharmProtocol): status.add(ops.MaintenanceStatus("Configuring Kube Control")) ca_cert, endpoints = "", [f"https://{binding.network.bind_address}:6443"] - labels = str(charm.model.config["labels"]) - taints = str(charm.model.config["register-with-taints"]) + labels = str(charm.model.config["node-labels"]) + taints = str(charm.model.config["bootstrap-node-taints"]) if charm._internal_kubeconfig.exists(): kubeconfig = yaml.safe_load(charm._internal_kubeconfig.read_text()) cluster = kubeconfig["clusters"][0]["cluster"] diff --git a/charms/worker/k8s/tests/unit/test_base.py b/charms/worker/k8s/tests/unit/test_base.py index dc1038ce..a69a6f45 100644 --- a/charms/worker/k8s/tests/unit/test_base.py +++ b/charms/worker/k8s/tests/unit/test_base.py @@ -144,7 +144,7 @@ def test_configure_datastore_bootstrap_config_etcd(harness): harness.disable_hooks() bs_config = BootstrapConfig() - harness.update_config({"datastore": "etcd"}) + harness.update_config({"bootstrap-datastore": "etcd"}) harness.add_relation("etcd", "etcd") with mock.patch.object(harness.charm, "etcd") as mock_etcd: mock_etcd.is_ready = True @@ -182,7 +182,7 @@ def test_configure_datastore_runtime_config_etcd(harness): pytest.skip("Not applicable on workers") harness.disable_hooks() - harness.update_config({"datastore": "etcd"}) + harness.update_config({"bootstrap-datastore": "etcd"}) harness.add_relation("etcd", "etcd") with mock.patch.object(harness.charm, "etcd") as mock_etcd: mock_etcd.is_ready = True @@ -190,6 +190,7 @@ def test_configure_datastore_runtime_config_etcd(harness): mock_etcd.get_connection_string.return_value = "foo:1234,bar:1234" uccr_config = UpdateClusterConfigRequest() harness.charm._configure_datastore(uccr_config) + assert uccr_config.datastore assert uccr_config.datastore.ca_crt == "" assert uccr_config.datastore.client_crt == "" assert uccr_config.datastore.client_key == "" diff --git a/tests/integration/data/test-bundle-ceph.yaml b/tests/integration/data/test-bundle-ceph.yaml index 8b803e9e..37d4c912 100644 --- a/tests/integration/data/test-bundle-ceph.yaml +++ b/tests/integration/data/test-bundle-ceph.yaml @@ -11,6 +11,8 @@ applications: channel: latest/edge constraints: cores=2 mem=8G root-disk=16G num_units: 1 + options: + bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" k8s-worker: charm: k8s-worker channel: latest/edge diff --git a/tests/integration/data/test-bundle-etcd.yaml b/tests/integration/data/test-bundle-etcd.yaml index 7446af4f..662c984f 100644 --- a/tests/integration/data/test-bundle-etcd.yaml +++ b/tests/integration/data/test-bundle-etcd.yaml @@ -22,7 +22,8 @@ applications: num_units: 1 constraints: cores=2 mem=8G root-disk=16G options: - datastore: etcd + bootstrap-datastore: etcd + bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" k8s-worker: charm: k8s-worker channel: latest/edge diff --git a/tests/integration/data/test-bundle.yaml b/tests/integration/data/test-bundle.yaml index dcc5710f..76448c31 100644 --- a/tests/integration/data/test-bundle.yaml +++ b/tests/integration/data/test-bundle.yaml @@ -12,6 +12,8 @@ applications: num_units: 3 constraints: cores=2 mem=8G root-disk=16G expose: true + options: + bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" k8s-worker: charm: k8s-worker channel: latest/edge diff --git a/tests/integration/test_k8s.py b/tests/integration/test_k8s.py index 258c039d..2e6ee6ea 100644 --- a/tests/integration/test_k8s.py +++ b/tests/integration/test_k8s.py @@ -54,7 +54,7 @@ async def test_nodes_labelled(request, kubernetes_cluster: model.Model): testname: str = request.node.name k8s: application.Application = kubernetes_cluster.applications["k8s"] worker: application.Application = kubernetes_cluster.applications["k8s-worker"] - label_config = {"labels": f"{testname}="} + label_config = {"node-labels": f"{testname}="} await asyncio.gather(k8s.set_config(label_config), worker.set_config(label_config)) await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) From aaf658a54ed7272ba0b4fa32db361a47736d2f78 Mon Sep 17 00:00:00 2001 From: Homayoon Alimohammadi Date: Fri, 22 Nov 2024 15:34:13 +0400 Subject: [PATCH 08/13] Replace AssertionError with ReconcilerError (#173) --- charms/worker/k8s/src/charm.py | 26 +++++++++------- charms/worker/k8s/src/token_distributor.py | 35 +++++++++++++++++----- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 95fa9950..39b03f25 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -191,7 +191,6 @@ def _k8s_info(self, event: ops.EventBase): @status.on_error( ops.WaitingStatus("Installing COS requirements"), subprocess.CalledProcessError, - AssertionError, ) def _apply_cos_requirements(self): """Apply COS requirements for integration. @@ -315,7 +314,7 @@ def _check_k8sd_ready(self): @on_error( ops.WaitingStatus("Waiting to bootstrap k8s snap"), - AssertionError, + ReconcilerError, InvalidResponseError, K8sdConnectionError, ) @@ -387,7 +386,7 @@ def _get_valid_annotations(self) -> Optional[dict]: dict: The parsed annotations if valid, otherwise None. Raises: - AssertionError: If any annotation is invalid. + ReconcilerError: If any annotation is invalid. """ raw_annotations = self.config.get("annotations") if not raw_annotations: @@ -398,9 +397,10 @@ def _get_valid_annotations(self) -> Optional[dict]: annotations = {} try: for key, value in [pair.split("=", 1) for pair in raw_annotations.split()]: - assert key and value, "Invalid Annotation" # nosec + if not key or not value: + raise ReconcilerError("Invalid Annotation") annotations[key] = value - except AssertionError: + except ReconcilerError: log.exception("Invalid annotations: %s", raw_annotations) status.add(ops.BlockedStatus("Invalid Annotations")) raise @@ -456,14 +456,18 @@ def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfi ", ".join(SUPPORTED_DATASTORES), ) status.add(ops.BlockedStatus(f"Invalid datastore: {datastore}")) - assert datastore in SUPPORTED_DATASTORES # nosec + if datastore not in SUPPORTED_DATASTORES: + raise ReconcilerError(f"Invalid datastore: {datastore}") if datastore == "etcd": log.info("Using etcd as external datastore") etcd_relation = self.model.get_relation("etcd") - assert etcd_relation, "Missing etcd relation" # nosec - assert self.etcd.is_ready, "etcd is not ready" # nosec + if not etcd_relation: + raise ReconcilerError("Missing etcd relation") + + if not self.etcd.is_ready: + raise ReconcilerError("etcd is not ready") etcd_config = self.etcd.get_client_credentials() if isinstance(config, BootstrapConfig): @@ -580,7 +584,7 @@ def _enable_functionalities(self): @on_error( WaitingStatus("Ensure that the cluster configuration is up-to-date"), - AssertionError, + ReconcilerError, InvalidResponseError, K8sdConnectionError, ) @@ -618,7 +622,7 @@ def _get_scrape_jobs(self): return self.cos.get_metrics_endpoints( self.get_node_name(), token, self.is_control_plane ) - except AssertionError: + except ReconcilerError: log.exception("Failed to get COS token.") return [] @@ -690,7 +694,7 @@ def _get_proxy_env(self) -> Dict[str, str]: @on_error( WaitingStatus("Waiting for Cluster token"), - AssertionError, + ReconcilerError, InvalidResponseError, K8sdConnectionError, ) diff --git a/charms/worker/k8s/src/token_distributor.py b/charms/worker/k8s/src/token_distributor.py index c3d65c41..d51d91bb 100644 --- a/charms/worker/k8s/src/token_distributor.py +++ b/charms/worker/k8s/src/token_distributor.py @@ -11,6 +11,7 @@ import charms.contextual_status as status import ops +from charms.contextual_status import ReconcilerError from charms.k8s.v0.k8sd_api_manager import ( ErrorCodes, InvalidResponseError, @@ -209,6 +210,9 @@ def cluster_name(self, relation: ops.Relation, local: bool) -> str: Returns: the recovered cluster name from existing relations + + Raises: + ReconcilerError: If fails to find 1 relation-name:cluster-name. """ cluster_name: Optional[str] = "" if not local: @@ -218,7 +222,8 @@ def cluster_name(self, relation: ops.Relation, local: bool) -> str: if value := relation.data[unit].get("cluster-name"): values |= {value} if values: - assert len(values) == 1, f"Failed to find 1 {relation.name}:cluster-name" # nosec + if len(values) != 1: + raise ReconcilerError(f"Failed to find 1 {relation.name}:cluster-name") (cluster_name,) = values elif not (cluster_name := relation.data[self.charm.unit].get("joined")): # joined_cluster_name @@ -235,6 +240,12 @@ def recover_token(self, relation: ops.Relation) -> Generator[str, None, None]: Yields: str: extracted token content + + Raises: + ReconcilerError: + - If fails to find 1 relation-name:secret-id. + - If relation-name:secret-key is not valid. + - If relation-name:token is not valid. """ self.request(relation) @@ -246,14 +257,17 @@ def recover_token(self, relation: ops.Relation) -> Generator[str, None, None]: if (secret_id := relation.data[unit].get(secret_key)) } - assert len(secret_ids) == 1, f"Failed to find 1 {relation.name}:{secret_key}" # nosec + if len(secret_ids) != 1: + raise ReconcilerError(f"Failed to find 1 {relation.name}:{secret_key}") (secret_id,) = secret_ids - assert secret_id, f"{relation.name}:{secret_key} is not valid" # nosec + if not secret_id: + raise ReconcilerError(f"{relation.name}:{secret_key} is not valid") secret = self.charm.model.get_secret(id=secret_id) # Get the content from the secret content = secret.get_content(refresh=True) - assert content["token"], f"{relation.name}:token not valid" # nosec + if not content.get("token"): + raise ReconcilerError(f"{relation.name}:token not valid") yield content["token"] # signal that the relation is joined, the token is used @@ -343,7 +357,7 @@ def update_node(self, relation: ops.Relation, unit: ops.Unit, state: str): """ relation.data[self.charm.app][unit.name] = state - def allocate_tokens( + def allocate_tokens( # noqa: C901 self, relation: ops.Relation, token_strategy: TokenStrategy, @@ -356,16 +370,23 @@ def allocate_tokens( token_strategy (TokenStrategy): The strategy of token creation. token_type (ClusterTokenType): The type of cluster token. Defaults to ClusterTokenType.NONE. + + Raises: + ReconcilerError: + - If token_strategy is valid. + - If remote application doesn't exist on relation. """ units = relation.units if self.charm.app == relation.app: # include self in peer relations units |= {self.charm.unit} - assert relation.app, f"Remote application doesn't exist on {relation.name}" # nosec + if not relation.app: + raise ReconcilerError(f"Remote application doesn't exist on {relation.name}") # Select the appropriate token creation strategy tokenizer = self.token_strategies.get(token_strategy) - assert tokenizer, f"Invalid token_strategy: {token_strategy}" # nosec + if not tokenizer: + raise ReconcilerError(f"Invalid token_strategy: {token_strategy}") log.info("Allocating %s %s tokens", token_type.name.title(), token_strategy.name.title()) status.add( From 109adea9128d75d9b3ca1b1595b3dc64aa339142 Mon Sep 17 00:00:00 2001 From: eaudetcobello Date: Fri, 22 Nov 2024 12:11:21 -0500 Subject: [PATCH 09/13] Add Load Balancer configuration through charm config (#170) * Add cluster config through charm config * initial commit * Filled out the charmcraft with the features --------- Co-authored-by: Benjamin Schimke Co-authored-by: Adam Dyess --- charms/worker/k8s/charmcraft.yaml | 48 +++++++++++++++++++ .../k8s/lib/charms/k8s/v0/k8sd_api_manager.py | 8 ++-- charms/worker/k8s/src/charm.py | 14 ++++++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 72b2b452..4746bec8 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -185,6 +185,54 @@ config: default: false description: | Enable/Disable the gateway feature on the cluster. + load-balancer-enabled: + type: boolean + default: false + description: | + Enable/Disable the load balancer feature on the cluster. + load-balancer-cidrs: + type: string + default: "" + description: | + Space-separated list of CIDRs to use for the load balancer. This is + only used if load-balancer-enabled is set to true. + load-balancer-l2-mode: + type: boolean + default: false + description: | + Enable/Disable L2 mode for the load balancer. This is only used if + load-balancer-enabled is set to true. + load-balancer-l2-interfaces: + type: string + default: "" + description: | + Space-separated list of interfaces to use for the load balancer. This + is only used if load-balancer-l2-mode is set to true. if unset, all + interfaces will be used. + load-balancer-bgp-mode: + type: boolean + default: false + description: | + Enable/Disable BGP mode for the load balancer. This is only used if + load-balancer-enabled is set to true. + load-balancer-bgp-local-asn: + type: int + default: 64512 + description: | + Local ASN for the load balancer. This is only used if load-balancer-bgp-mode + is set to true. + load-balancer-bgp-peer-address: + type: string + default: "" + description: | + Address of the BGP peer for the load balancer. This is only used if + load-balancer-bgp-mode is set to true. + load-balancer-bgp-peer-port: + type: int + default: 179 + description: | + Port of the BGP peer for the load balancer. This is only used if + load-balancer-bgp-mode is set to true. local-storage-enabled: type: boolean default: true diff --git a/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py b/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py index 6b0cf534..12234de1 100644 --- a/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py +++ b/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py @@ -230,9 +230,9 @@ class LoadBalancerConfig(BaseModel, allow_population_by_field_name=True): Attributes: enabled: Optional flag which represents the status of LoadBalancer. cidrs: List of CIDR blocks for the load balancer. - l2_enabled: Optional flag to enable or disable layer 2 functionality. + l2_mode: Optional flag to enable or disable layer 2 mode. l2_interfaces: List of layer 2 interfaces for the load balancer. - bgp_enabled: Optional flag to enable or disable BGP. + bgp_mode: Optional flag to enable or disable BGP. bgp_local_asn: The local ASN for BGP configuration. bgp_peer_address: The peer address for BGP configuration. bgp_peer_asn: The peer ASN for BGP configuration. @@ -241,9 +241,9 @@ class LoadBalancerConfig(BaseModel, allow_population_by_field_name=True): enabled: Optional[bool] = Field(default=None) cidrs: Optional[List[str]] = Field(default=None) - l2_enabled: Optional[bool] = Field(default=None, alias="l2-enabled") + l2_mode: Optional[bool] = Field(default=None, alias="l2-mode") l2_interfaces: Optional[List[str]] = Field(default=None, alias="l2-interfaces") - bgp_enabled: Optional[bool] = Field(default=None, alias="bgp-enabled") + bgp_mode: Optional[bool] = Field(default=None, alias="bgp-mode") bgp_local_asn: Optional[int] = Field(default=None, alias="bgp-local-asn") bgp_peer_address: Optional[str] = Field(default=None, alias="bgp-peer-address") bgp_peer_asn: Optional[int] = Field(default=None, alias="bgp-peer-asn") diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 39b03f25..6936b219 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -45,6 +45,7 @@ JoinClusterRequest, K8sdAPIManager, K8sdConnectionError, + LoadBalancerConfig, LocalStorageConfig, NetworkConfig, UnixSocketConnectionFactory, @@ -427,6 +428,18 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: enabled=self.config.get("network-enabled"), ) + load_balancer = LoadBalancerConfig( + enabled=self.config.get("load-balancer-enabled"), + cidrs=str(self.config.get("load-balancer-cidrs")).split(), + l2_mode=self.config.get("load-balancer-l2-mode"), + l2_interfaces=str(self.config.get("load-balancer-l2-interfaces")).split(), + bgp_mode=self.config.get("load-balancer-bgp-mode"), + bgp_local_asn=self.config.get("load-balancer-bgp-local-asn"), + bgp_peer_address=self.config.get("load-balancer-bgp-peer-address"), + bgp_peer_asn=self.config.get("load-balancer-bgp-peer-asn"), + bgp_peer_port=self.config.get("load-balancer-bgp-peer-port"), + ) + cloud_provider = None if self.xcp.has_xcp: cloud_provider = "external" @@ -437,6 +450,7 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: network=network, annotations=self._get_valid_annotations(), cloud_provider=cloud_provider, + load_balancer=load_balancer, ) def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfigRequest]): From 9909a91458402ed863c2d19d6e59b123422b9a93 Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Fri, 22 Nov 2024 12:55:38 -0600 Subject: [PATCH 10/13] Add charm config for metrics-server and dns-config (#183) --- charms/worker/k8s/charmcraft.yaml | 122 ++++++++++++++-------- charms/worker/k8s/src/charm.py | 43 ++++---- charms/worker/k8s/tests/unit/test_base.py | 1 - 3 files changed, 95 insertions(+), 71 deletions(-) diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 4746bec8..09ec05cf 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -81,6 +81,58 @@ config: Example: e.g.: key1=value1 key2=value2 + bootstrap-datastore: + default: dqlite + type: string + description: | + The datastore to use in Canonical Kubernetes. This cannot be changed + after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is + chosen, the charm should be integrated with the etcd charm. + bootstrap-node-taints: + type: string + default: "" + description: | + Space-separated list of taints to apply to this node at registration time. + + This config is only used at bootstrap time when Kubelet first registers the + node with Kubernetes. To change node taints after deploy time, use kubectl + instead. + + For more information, see the upstream Kubernetes documentation about + taints: + https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + bootstrap-pod-cidr: + type: string + default: "10.1.0.0/16" + description: | + Comma-separated CIDR blocks for IP addresses that can be assigned + to pods within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" + bootstrap-service-cidr: + type: string + default: 10.152.183.0/24 + description: | + Comma-separated CIDR blocks for IP addresses that can be assigned + to services within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" containerd-custom-registries: type: string default: "[]" @@ -127,59 +179,32 @@ config: "cert_file": "'"$(base64 -w 0 < ~/my.custom.cert.pem)"'", "key_file": "'"$(base64 -w 0 < ~/my.custom.key.pem)"'", }]' - - bootstrap-datastore: - default: dqlite - type: string + dns-enabled: + type: boolean + default: true description: | - The datastore to use in Canonical Kubernetes. This cannot be changed - after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is - chosen, the charm should be integrated with the etcd charm. - bootstrap-node-taints: + Enable/Disable the DNS feature on the cluster. + dns-cluster-domain: type: string - default: "" + default: "cluster.local" description: | - Space-separated list of taints to apply to this node at registration time. - - This config is only used at bootstrap time when Kubelet first registers the - node with Kubernetes. To change node taints after deploy time, use kubectl - instead. - - For more information, see the upstream Kubernetes documentation about - taints: - https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ - bootstrap-pod-cidr: + Sets the local domain of the cluster + dns-service-ip: type: string - default: "10.1.0.0/16" + default: "" description: | - Comma-separated CIDR blocks for IP addresses that can be assigned - to pods within the cluster. Can contain at most 2 blocks, one for IPv4 - and one for IPv6. - - After deployment it is not possible to change the size of - the IP range. - - Examples: - - "192.0.2.0/24" - - "2001:db8::/32" - - "192.0.2.0/24,2001:db8::/32" - - "2001:db8::/32,192.0.2.0/24" - bootstrap-service-cidr: + Sets the IP address of the dns service. If omitted defaults to the IP address + of the Kubernetes service created by the feature. + + Can be used to point to an external dns server when feature is disabled. + dns-upstream-nameservers: type: string - default: 10.152.183.0/24 + default: "" description: | - Comma-separated CIDR blocks for IP addresses that can be assigned - to services within the cluster. Can contain at most 2 blocks, one for IPv4 - and one for IPv6. - - After deployment it is not possible to change the size of - the IP range. - - Examples: - - "192.0.2.0/24" - - "2001:db8::/32" - - "192.0.2.0/24,2001:db8::/32" - - "2001:db8::/32,192.0.2.0/24" + Space-separated list of upstream nameservers used to forward queries for out-of-cluster + endpoints. + + If omitted defaults to `/etc/resolv.conf` and uses the nameservers on each node. gateway-enabled: type: boolean default: false @@ -256,6 +281,11 @@ config: "Retain". If set to "Delete", the storage will be deleted when the PersistentVolumeClaim is deleted. If set to "Retain", the storage will be retained when the PersistentVolumeClaim is deleted. + metrics-server-enabled: + type: boolean + default: true + description: | + Enable/Disable the metrics-server feature on the cluster. network-enabled: type: boolean default: true diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 6936b219..e57809bc 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -47,6 +47,7 @@ K8sdConnectionError, LoadBalancerConfig, LocalStorageConfig, + MetricsServerConfig, NetworkConfig, UnixSocketConnectionFactory, UpdateClusterConfigRequest, @@ -422,12 +423,24 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: # https://github.com/canonical/k8s-operator/pull/169/files#r1847378214 ) + dns_config = DNSConfig( + enabled=self.config.get("dns-enabled"), + ) + if cfg := self.config.get("dns-cluster-domain"): + dns_config.cluster_domain = str(cfg) + if cfg := self.config.get("dns-service-ip"): + dns_config.service_ip = str(cfg) + if cfg := self.config.get("dns-upstream-nameservers"): + dns_config.upstream_nameservers = str(cfg).split() + gateway = GatewayConfig(enabled=self.config.get("gateway-enabled")) network = NetworkConfig( enabled=self.config.get("network-enabled"), ) + metrics_server = MetricsServerConfig(enabled=self.config.get("metrics-server-enabled")) + load_balancer = LoadBalancerConfig( enabled=self.config.get("load-balancer-enabled"), cidrs=str(self.config.get("load-balancer-cidrs")).split(), @@ -445,12 +458,14 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: cloud_provider = "external" return UserFacingClusterConfig( - local_storage=local_storage, - gateway=gateway, - network=network, annotations=self._get_valid_annotations(), cloud_provider=cloud_provider, + dns_config=dns_config, + gateway=gateway, + local_storage=local_storage, load_balancer=load_balancer, + metrics_server=metrics_server, + network=network, ) def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfigRequest]): @@ -577,25 +592,6 @@ def _create_cos_tokens(self): token_type=ClusterTokenType.WORKER, ) - @on_error( - WaitingStatus("Waiting to enable features"), - InvalidResponseError, - K8sdConnectionError, - ) - def _enable_functionalities(self): - """Enable necessary components for the Kubernetes cluster.""" - status.add(ops.MaintenanceStatus("Updating K8s features")) - log.info("Enabling K8s features") - dns_config = DNSConfig(enabled=True) - network_config = NetworkConfig(enabled=True) - local_storage_config = LocalStorageConfig(enabled=True) - user_cluster_config = UserFacingClusterConfig( - dns=dns_config, network=network_config, local_storage=local_storage_config - ) - update_request = UpdateClusterConfigRequest(config=user_cluster_config) - - self.api_manager.update_cluster_config(update_request) - @on_error( WaitingStatus("Ensure that the cluster configuration is up-to-date"), ReconcilerError, @@ -794,12 +790,11 @@ def _reconcile(self, event: ops.EventBase): if self.lead_control_plane: self._k8s_info(event) self._bootstrap_k8s_snap() - self._enable_functionalities() + self._ensure_cluster_config() self._create_cluster_tokens() self._create_cos_tokens() self._apply_cos_requirements() self._revoke_cluster_tokens(event) - self._ensure_cluster_config() self._announce_kubernetes_version() self._join_cluster(event) self._config_containerd_registries() diff --git a/charms/worker/k8s/tests/unit/test_base.py b/charms/worker/k8s/tests/unit/test_base.py index a69a6f45..6a3f419a 100644 --- a/charms/worker/k8s/tests/unit/test_base.py +++ b/charms/worker/k8s/tests/unit/test_base.py @@ -58,7 +58,6 @@ def mock_reconciler_handlers(harness): if harness.charm.is_control_plane: handler_names |= { "_bootstrap_k8s_snap", - "_enable_functionalities", "_create_cluster_tokens", "_create_cos_tokens", "_apply_cos_requirements", From 25370eb4bc268e11af0991dec1495c91e436718c Mon Sep 17 00:00:00 2001 From: Homayoon Alimohammadi Date: Sat, 23 Nov 2024 01:30:18 +0400 Subject: [PATCH 11/13] Add Ingress feature configuration (#176) * Add Ingress feature config options * Remove defaultTLS option --- charms/worker/k8s/charmcraft.yaml | 10 +++++++++ charms/worker/k8s/src/charm.py | 7 ++++++ .../k8s/tests/unit/test_config_options.py | 22 +++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 09ec05cf..0f685cc3 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -291,6 +291,16 @@ config: default: true description: | Enables or disables the network feature. + ingress-enabled: + type: boolean + default: false + description: | + Determines if the ingress feature should be enabled. + ingress-enable-proxy-protocol: + type: boolean + default: false + description: | + Determines if the proxy protocol should be enabled for ingresses. node-labels: default: "" type: string diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index e57809bc..edaaa0cf 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -41,6 +41,7 @@ CreateClusterRequest, DNSConfig, GatewayConfig, + IngressConfig, InvalidResponseError, JoinClusterRequest, K8sdAPIManager, @@ -439,6 +440,11 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: enabled=self.config.get("network-enabled"), ) + ingress = IngressConfig( + enabled=self.config.get("ingress-enabled"), + enable_proxy_protocol=self.config.get("ingress-enable-proxy-protocol"), + ) + metrics_server = MetricsServerConfig(enabled=self.config.get("metrics-server-enabled")) load_balancer = LoadBalancerConfig( @@ -462,6 +468,7 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: cloud_provider=cloud_provider, dns_config=dns_config, gateway=gateway, + ingress=ingress, local_storage=local_storage, load_balancer=load_balancer, metrics_server=metrics_server, diff --git a/charms/worker/k8s/tests/unit/test_config_options.py b/charms/worker/k8s/tests/unit/test_config_options.py index 13bb4f49..7a12be6c 100644 --- a/charms/worker/k8s/tests/unit/test_config_options.py +++ b/charms/worker/k8s/tests/unit/test_config_options.py @@ -50,3 +50,25 @@ def test_configure_network_options(harness): harness.update_config({"network-enabled": True}) ufcg = harness.charm._assemble_cluster_config() assert ufcg.network.enabled, "Network should be enabled" + + +def test_configure_ingress_options(harness): + """Test configuring the ingress options. + + Args: + harness: the harness under test + """ + if harness.charm.is_worker: + pytest.skip("Not applicable on workers") + + harness.disable_hooks() + + enabled = True + proxy_protocol_enabled = True + + harness.update_config({"ingress-enabled": enabled}) + harness.update_config({"ingress-enable-proxy-protocol": proxy_protocol_enabled}) + + ufcg = harness.charm._assemble_cluster_config() + assert ufcg.ingress.enabled == enabled + assert ufcg.ingress.enable_proxy_protocol == proxy_protocol_enabled From 8320b91d4a6b7b5c238cd47d3be63ff990823bbb Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Fri, 22 Nov 2024 16:42:15 -0600 Subject: [PATCH 12/13] Reduce machine count for running ceph tests (#186) --- tests/integration/conftest.py | 4 +++- tests/integration/data/test-bundle-ceph.yaml | 12 ++---------- tests/integration/test_ceph.py | 5 +---- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index f4a147a6..7ec676be 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -241,7 +241,9 @@ def switch(self, name: str, path: Optional[Path] = None, channel: Optional[str] Raises: ValueError: if both path and channel are provided, or neither are provided """ - app = self.applications[name] + app = self.applications.get(name) + if not app: + return # Skip if the application is not in the bundle if (not path and not channel) or (path and channel): raise ValueError("channel and path are mutually exclusive") if path: diff --git a/tests/integration/data/test-bundle-ceph.yaml b/tests/integration/data/test-bundle-ceph.yaml index 37d4c912..4f93361e 100644 --- a/tests/integration/data/test-bundle-ceph.yaml +++ b/tests/integration/data/test-bundle-ceph.yaml @@ -11,13 +11,6 @@ applications: channel: latest/edge constraints: cores=2 mem=8G root-disk=16G num_units: 1 - options: - bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" - k8s-worker: - charm: k8s-worker - channel: latest/edge - constraints: cores=2 mem=8G root-disk=16G - num_units: 1 ceph-csi: charm: ceph-csi channel: latest/stable @@ -30,17 +23,16 @@ applications: num_units: 1 options: monitor-count: 1 - expected-osd-count: 1 + expected-osd-count: 2 ceph-osd: charm: ceph-osd channel: quincy/stable constraints: cores=2 mem=4G root-disk=16G - num_units: 3 + num_units: 2 storage: osd-devices: 1G,1 osd-journals: 1G,1 relations: - - [k8s, k8s-worker:cluster] - [ceph-csi, k8s:ceph-k8s-info] - [ceph-csi, ceph-mon:client] - [ceph-mon, ceph-osd:mon] diff --git a/tests/integration/test_ceph.py b/tests/integration/test_ceph.py index 61bcb39f..ae1a4732 100644 --- a/tests/integration/test_ceph.py +++ b/tests/integration/test_ceph.py @@ -15,10 +15,7 @@ # This pytest mark configures the test environment to use the Canonical Kubernetes # bundle with ceph, for all the test within this module. -pytestmark = [ - pytest.mark.bundle_file("test-bundle-ceph.yaml"), - pytest.mark.ignore_blocked, -] +pytestmark = [pytest.mark.bundle_file("test-bundle-ceph.yaml")] def _get_data_file_path(name) -> str: From 69c25661a411220fc3e7392ecf7e35019dfad7ad Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Fri, 22 Nov 2024 18:30:52 -0500 Subject: [PATCH 13/13] Fix Upgrade Check and Literals (#185) Co-authored-by: Adam Dyess --- charms/worker/k8s/src/inspector.py | 23 +++++++++++++++---- charms/worker/k8s/src/literals.py | 4 ++-- charms/worker/k8s/src/upgrade.py | 2 +- .../worker/k8s/tests/unit/test_inspector.py | 5 ++-- charms/worker/k8s/tests/unit/test_upgrade.py | 8 ++++--- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/charms/worker/k8s/src/inspector.py b/charms/worker/k8s/src/inspector.py index 1b3f1a18..783b5ea1 100644 --- a/charms/worker/k8s/src/inspector.py +++ b/charms/worker/k8s/src/inspector.py @@ -57,14 +57,27 @@ def get_nodes(self, labels: LabelSelector) -> Optional[List[Node]]: ClusterInspectorError: If the nodes cannot be retrieved. """ client = self._get_client() - unready_nodes = [] try: - for node in client.list(Node, labels=labels): - if node.status != "Ready": - unready_nodes.append(node) + + def is_node_not_ready(node: Node) -> bool: + """Check if a node is not ready. + + Args: + node: The node to check. + + Returns: + True if the node is not ready, False otherwise. + """ + if not node.status or not node.status.conditions: + return True + return any( + condition.type == "Ready" and condition.status != "True" + for condition in node.status.conditions + ) + + return [node for node in client.list(Node, labels=labels) if is_node_not_ready(node)] except ApiError as e: raise ClusterInspector.ClusterInspectorError(f"Failed to get nodes: {e}") from e - return unready_nodes or None def verify_pods_running(self, namespaces: List[str]) -> Optional[str]: """Verify that all pods in the specified namespaces are running. diff --git a/charms/worker/k8s/src/literals.py b/charms/worker/k8s/src/literals.py index 656f95af..950b9edd 100644 --- a/charms/worker/k8s/src/literals.py +++ b/charms/worker/k8s/src/literals.py @@ -13,9 +13,9 @@ }, # NOTE: Update the dependencies for the k8s-service before releasing. "k8s_service": { - "dependencies": {"k8s-worker": "^1.31.0"}, + "dependencies": {"k8s-worker": "^1.30, < 1.32"}, "name": "k8s", - "upgrade_supported": "^1.30.0", + "upgrade_supported": "^1.30, < 1.32", "version": "1.31.2", }, } diff --git a/charms/worker/k8s/src/upgrade.py b/charms/worker/k8s/src/upgrade.py index fc2548f6..4c8efef1 100644 --- a/charms/worker/k8s/src/upgrade.py +++ b/charms/worker/k8s/src/upgrade.py @@ -66,7 +66,7 @@ def pre_upgrade_check(self) -> None: if unready_nodes: raise ClusterNotReadyError( message="Cluster is not ready for an upgrade", - cause=f"Nodes not ready: {', '.join(unready_nodes)}", + cause=f"Nodes not ready: {', '.join(node.metadata.name for node in unready_nodes)}", resolution="""Node(s) may be in a bad state. Please check the node(s) for more information.""", ) diff --git a/charms/worker/k8s/tests/unit/test_inspector.py b/charms/worker/k8s/tests/unit/test_inspector.py index 2e532095..cb3c76eb 100644 --- a/charms/worker/k8s/tests/unit/test_inspector.py +++ b/charms/worker/k8s/tests/unit/test_inspector.py @@ -10,6 +10,7 @@ from inspector import ClusterInspector from lightkube.core.exceptions import ApiError +from lightkube.models.core_v1 import NodeCondition from lightkube.resources.core_v1 import Node, Pod @@ -25,11 +26,11 @@ def setUp(self): def test_get_nodes_returns_unready(self): """Test that get_nodes returns unready nodes.""" mock_node1 = MagicMock(spec=Node) - mock_node1.status = "Ready" + mock_node1.status.conditions = [NodeCondition(type="Ready", status="True")] mock_node1.metadata.name = "node1" mock_node2 = MagicMock(spec=Node) - mock_node2.status = "NotReady" + mock_node2.status.conditions = [NodeCondition(type="Ready", status="False")] mock_node2.metadata.name = "node2" self.mock_client.list.return_value = [mock_node1, mock_node2] diff --git a/charms/worker/k8s/tests/unit/test_upgrade.py b/charms/worker/k8s/tests/unit/test_upgrade.py index edeb5003..60e66bb4 100644 --- a/charms/worker/k8s/tests/unit/test_upgrade.py +++ b/charms/worker/k8s/tests/unit/test_upgrade.py @@ -8,6 +8,8 @@ from charms.data_platform_libs.v0.upgrade import ClusterNotReadyError from inspector import ClusterInspector +from lightkube.models.core_v1 import Node +from lightkube.models.meta_v1 import ObjectMeta from upgrade import K8sDependenciesModel, K8sUpgrade @@ -66,9 +68,9 @@ def test_pre_upgrade_check_unready_nodes(self): """Test pre_upgrade_check fails when nodes are not ready.""" self.charm.is_worker = True self.node_manager.get_nodes.return_value = [ - "worker-1", - "worker-2", - "worker-3", + Node(metadata=ObjectMeta(name="worker-1")), + Node(metadata=ObjectMeta(name="worker-2")), + Node(metadata=ObjectMeta(name="worker-3")), ] with self.assertRaises(ClusterNotReadyError):