From e1e6fa84b2205e8448cd2b9257cb87189259f142 Mon Sep 17 00:00:00 2001 From: Mateo Florido <32885896+mateoflorido@users.noreply.github.com> Date: Tue, 2 Apr 2024 12:47:51 -0500 Subject: [PATCH] Implement External Datastore Integration (ETCD) (#47) * Initial ETCD implementation * Add Integration tests * Add Datastore verification logic * Fix lint * Add BlockedState on invalid datastore * Lighter ETCD bundle * Assert Datastore in Supported Datastores * Fix nodes count * Skip etcd tests * Address linting issues * remove extraneous file * revert move of BootstrapConfig * Increment timeout values * Run etcd tests again * Remove duplicate logging * Parallel test runs * Test GH keys * Bump timeout to avoid LXD provisioning delays --------- Co-authored-by: Adam Dyess --- .github/workflows/integration_test.yaml | 9 +- charms/worker/k8s/charmcraft.yaml | 13 +- .../k8s/lib/charms/kubernetes_libs/v0/etcd.py | 218 ++++++++++++++++++ charms/worker/k8s/src/charm.py | 45 +++- pyproject.toml | 3 +- tests/integration/conftest.py | 40 +++- tests/integration/cos_substrate.py | 1 - tests/integration/data/test-bundle-etcd.yaml | 30 +++ tests/integration/{ => data}/test-bundle.yaml | 0 tests/integration/helpers.py | 47 +++- tests/integration/test_etcd.py | 27 +++ tests/integration/test_k8s.py | 63 +---- tox.ini | 2 +- 13 files changed, 424 insertions(+), 74 deletions(-) create mode 100644 charms/worker/k8s/lib/charms/kubernetes_libs/v0/etcd.py create mode 100644 tests/integration/data/test-bundle-etcd.yaml rename tests/integration/{ => data}/test-bundle.yaml (100%) create mode 100644 tests/integration/test_etcd.py diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index fc11ab0f..647d6b5e 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -5,6 +5,7 @@ on: pull_request: jobs: + extra-args: runs-on: ubuntu-latest outputs: @@ -14,6 +15,8 @@ jobs: id: flags env: TITLE: ${{ github.event.pull_request.title }} + JOB: ${{ github.job }} + WORKFLOW: ${{ github.workflow }} run: | EXTRA_ARGS="--crash-dump=on-failure" if [[ "$TITLE" == *"[COS]"* ]]; then @@ -35,14 +38,18 @@ jobs: integration-tests: uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main needs: [build-all-charms, extra-args] + strategy: + matrix: + suite: ["k8s", "etcd"] secrets: inherit with: provider: lxd juju-channel: 3.3/stable - extra-arguments: ${{needs.extra-args.outputs.args}} + extra-arguments: ${{needs.extra-args.outputs.args}} -k test_${{ matrix.suite }} load-test-enabled: false zap-enabled: false trivy-fs-enabled: true trivy-image-config: "trivy.yaml" tmate-debug: true test-timeout: 120 + test-tox-env: integration-${{ matrix.suite }} diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index f78b317a..f83d801e 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -55,12 +55,19 @@ config: default: edge type: string description: Snap channel of the k8s snap + datastore: + default: dqlite + type: string + description: | + The datastore to use in Canonical Kubernetes. This cannot be changed + after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is + chosen, the charm should be integrated with the etcd charm. labels: default: "" type: string description: | Labels can be used to organize and to select subsets of nodes in the - cluster. Declare node labels in key=value format, separated by spaces. + cluster. Declare node labels in key=value format, separated by spaces. actions: get-kubeconfig: @@ -87,3 +94,7 @@ provides: interface: k8s-cluster cos-worker-tokens: interface: cos-tokens + +requires: + etcd: + interface: etcd diff --git a/charms/worker/k8s/lib/charms/kubernetes_libs/v0/etcd.py b/charms/worker/k8s/lib/charms/kubernetes_libs/v0/etcd.py new file mode 100644 index 00000000..686a4332 --- /dev/null +++ b/charms/worker/k8s/lib/charms/kubernetes_libs/v0/etcd.py @@ -0,0 +1,218 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Charm library for the etcd reactive relation. + +The module defines an interface for a charm that requires the etcd relation. +It encapsulates the functionality and events related to managing the etcd relation, +including connection, availability of data, and handling of TLS credentials. + +It uses events to handle state changes in the etcd relation, such as when a connection is +established (`EtcdConnected`), when etcd data is available (`EtcdAvailable`), and when TLS data +for etcd is available (`EtcdTLSAvailable`). + +A class `EtcdReactiveRequires` is defined, which provides an abstraction over the charm's +requires relation to etcd. It encapsulates the functionality to check the status of the +relation, get connection details, and handle client credentials. + +This module also provides helper methods for handling client credentials, such as +saving them to local files and retrieving them from the relation data. + +You can use this charm library in your charm by adding it as a dependency in your +`charmcraft.yaml` file and then importing the relevant classes and functions. + +Example usage: +```python +from charms.kubernetes_libs.v0.etcd import EtcdReactiveRequires + +... + def __init__(self, *args): + self.etcd = EtcdReactiveRequires(self) + ... + # Handle the events from the relation + self.framework.observe(self.etcd.on.connected, self._on_etcd_connected) + self.framework.observe(self.etcd.on.available, self._on_etcd_available) + self.framework.observe(self.etcd.on.tls_available, self._on_etcd_tls_available) + +``` + +""" + +import hashlib +import json +import logging +import os +from functools import cached_property +from typing import Optional + +from ops.framework import EventBase, EventSource, Object, ObjectEvents, StoredState +from ops.model import Relation + +# The unique Charmhub library identifier, never change it +LIBID = "2d422394fe044d61ad1dc044ed051d1b" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 1 + +log = logging.getLogger(__name__) + + +class EtcdAvailable(EventBase): + """Event emitted when the etcd relation data is available.""" + + pass + + +class EtcdConnected(EventBase): + """Event emitted when the etcd relation is connected.""" + + pass + + +class EtcdTLSAvailable(EventBase): + """Event emitted when the etcd relation TLS data is available.""" + + pass + + +class EtcdConsumerEvents(ObjectEvents): + """Events emitted by the etcd translation interface.""" + + available = EventSource(EtcdAvailable) + connected = EventSource(EtcdConnected) + tls_available = EventSource(EtcdTLSAvailable) + + +class EtcdReactiveRequires(Object): + """Requires side of the etcd interface. + + This class is a translation interface that wraps the requires side + of the reactive etcd interface. + """ + + state = StoredState() + on = EtcdConsumerEvents() + + def __init__(self, charm, endpoint="etcd"): + super().__init__(charm, f"relation-{endpoint}") + self.charm = charm + self.endpoint = endpoint + + self.state.set_default( + connected=False, available=False, tls_available=False, connection_string="" + ) + + for event in ( + charm.on[endpoint].relation_created, + charm.on[endpoint].relation_joined, + charm.on[endpoint].relation_changed, + charm.on[endpoint].relation_departed, + charm.on[endpoint].relation_broken, + ): + self.framework.observe(event, self._check_relation) + + def _check_relation(self, _: EventBase): + """Check if the relation is available and emit the appropriate event.""" + if self.relation: + self.state.connected = True + self.on.connected.emit() + # etcd is available only if the connection string is available + if self.get_connection_string(): + self.state.available = True + self.on.available.emit() + # etcd tls is available only if the tls data is available + # (i.e. client cert, client key, ca cert) + cert = self.get_client_credentials() + if cert["client_cert"] and cert["client_key"] and cert["client_ca"]: + self.state.tls_available = True + self.on.tls_available.emit() + + def _get_dict_hash(self, data: dict) -> str: + """Generate a SHA-256 hash for a dictionary. + + This function converts the dictionary into a JSON string, ensuring it + is sorted in order. It then generates a SHA-256 hash of this string. + + Args: + data(dict): The dictionary to be hashed. + + Returns: + str: The hexadecimal representation of the hash of the dictionary. + """ + dump = json.dumps(data, sort_keys=True) + hash_obj = hashlib.sha256() + hash_obj.update(dump.encode()) + return hash_obj.hexdigest() + + @property + def is_ready(self): + """Check if the relation is available and emit the appropriate event.""" + if self.relation: + if self.get_connection_string(): + cert = self.get_client_credentials() + if all(cert.get(key) for key in ["client_cert", "client_key", "client_ca"]): + return True + return False + + def get_connection_string(self) -> str: + """Return the connection string for etcd.""" + remote_data = self._remote_data + if remote_data: + return remote_data.get("connection_string") + return "" + + def get_client_credentials(self) -> dict: + """Return the client credentials for etcd.""" + remote_data = self._remote_data + return { + "client_cert": remote_data.get("client_cert"), + "client_key": remote_data.get("client_key"), + "client_ca": remote_data.get("client_ca"), + } + + @cached_property + def relation(self) -> Optional[Relation]: + """Return the relation object for this interface.""" + return self.model.get_relation(self.endpoint) + + @property + def _remote_data(self): + """Return the remote relation data for this interface.""" + if not (self.relation and self.relation.units): + return {} + + first_unit = next(iter(self.relation.units), None) + data = self.relation.data[first_unit] + return data + + def save_client_credentials(self, ca_path, cert_path, key_path): + """Save all the client certificates for etcd to local files.""" + credentials = {"client_key": key_path, "client_cert": cert_path, "client_ca": ca_path} + for key, path in credentials.items(): + self._save_remote_data(key, path) + + def _save_remote_data(self, key: str, path: str): + """Save the remote data to a file.""" + value = self._remote_data.get(key) + if value: + parent = os.path.dirname(path) + if not os.path.isdir(parent): + os.makedirs(parent) + with open(path, "w") as stream: + stream.write(value) diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 133d904e..296c3f58 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -44,6 +44,7 @@ UpdateClusterConfigRequest, UserFacingClusterConfig, ) +from charms.kubernetes_libs.v0.etcd import EtcdReactiveRequires from charms.node_base import LabelMaker from charms.operator_libs_linux.v2.snap import SnapError, SnapState from charms.operator_libs_linux.v2.snap import ensure as snap_ensure @@ -60,6 +61,7 @@ ETC_KUBERNETES = Path("/etc/kubernetes") KUBECTL_PATH = Path("/snap/k8s/current/bin/kubectl") K8SD_PORT = 6400 +SUPPORTED_DATASTORES = ["dqlite", "etcd"] class K8sCharm(ops.CharmBase): @@ -107,6 +109,7 @@ def __init__(self, *args): self.framework.observe(self.on.update_status, self._on_update_status) if self.is_control_plane: + self.etcd = EtcdReactiveRequires(self) self.framework.observe(self.on.get_kubeconfig_action, self._get_external_kubeconfig) @status.on_error( @@ -205,17 +208,19 @@ def _check_k8sd_ready(self): self.api_manager.check_k8sd_ready() @on_error( - ops.WaitingStatus("Failed to bootstrap k8s snap"), + ops.WaitingStatus("Waiting to bootstrap k8s snap"), + AssertionError, InvalidResponseError, K8sdConnectionError, ) def _bootstrap_k8s_snap(self): - """Bootstrap the k8s snap package.""" + """Bootstrap k8s if it's not already bootstrapped.""" if self.api_manager.is_cluster_bootstrapped(): log.info("K8s cluster already bootstrapped") return bootstrap_config = BootstrapConfig() + self._configure_datastore(bootstrap_config) status.add(ops.MaintenanceStatus("Bootstrapping Cluster")) @@ -225,7 +230,6 @@ def _bootstrap_k8s_snap(self): config_str = { "bootstrapConfig": yaml.dump(bootstrap_config.dict(by_alias=True, exclude_none=True)) } - payload = CreateClusterRequest( name=node_name, address=f"{address}:{K8SD_PORT}", config=config_str ) @@ -248,6 +252,41 @@ def _configure_cos_integration(self): if relation := self.model.get_relation("cos-tokens"): self.collector.request(relation) + def _configure_datastore(self, config: BootstrapConfig): + """Configure the datastore for the Kubernetes cluster. + + Args: + config (BootstrapConfig): The bootstrap configuration object for + the Kubernetes cluster that is being configured. This object + will be modified in-place to include etcd's configuration details. + """ + datastore = self.config.get("datastore") + + if datastore not in SUPPORTED_DATASTORES: + log.error( + "Invalid datastore: %s. Supported values: %s", + datastore, + ", ".join(SUPPORTED_DATASTORES), + ) + status.add(ops.BlockedStatus(f"Invalid datastore: {datastore}")) + assert datastore in SUPPORTED_DATASTORES # nosec + + if datastore == "etcd": + log.info("Using etcd as external datastore") + etcd_relation = self.model.get_relation("etcd") + + assert etcd_relation, "Missing etcd relation" # nosec + assert self.etcd.is_ready, "etcd is not ready" # nosec + + config.datastore = "external" + etcd_config = self.etcd.get_client_credentials() + config.datastore_ca_cert = etcd_config.get("client_ca", "") + config.datastore_client_cert = etcd_config.get("client_cert", "") + config.datastore_client_key = etcd_config.get("client_key", "") + config.datastore_url = self.etcd.get_connection_string() + elif datastore == "dqlite": + log.info("Using dqlite as datastore") + def _revoke_cluster_tokens(self): """Revoke tokens for the units in the cluster and k8s-cluster relations. diff --git a/pyproject.toml b/pyproject.toml index 39c0ae92..890e99bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ namespace_packages = true [tool.pylint] # Ignore too-few-public-methods due to pydantic models # Ignore no-self-argument due to pydantic validators -disable = "wrong-import-order,redefined-outer-name,too-few-public-methods,no-self-argument,fixme,parse-error" +disable = "wrong-import-order,redefined-outer-name,too-many-instance-attributes,too-few-public-methods,no-self-argument,fixme,parse-error" # Ignore Pydantic check: https://github.com/pydantic/pydantic/issues/1961 extension-pkg-whitelist = "pydantic" # wokeignore:rule=whitelist @@ -54,7 +54,6 @@ ignored-classes = "ProfileManager,InstanceManager,NetworkManager" [tool.pytest.ini_options] minversion = "6.0" -log_cli_level = "INFO" # Linting tools configuration [tool.ruff] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 02b115f5..b393833b 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -41,12 +41,16 @@ def pytest_addoption(parser: pytest.Parser): def pytest_configure(config): - """Add cos marker parsing. + """Add pytest configuration args. Args: config: Pytest config. """ - config.addinivalue_line("markers", "cos: mark COS integration tests") + config.addinivalue_line("markers", "cos: mark COS integration tests.") + config.addinivalue_line("markers", "bundle_file(name): specify a YAML bundle file for a test.") + config.addinivalue_line( + "markers", "ignore_blocked: specify if the bundle deploy should ignore BlockedStatus." + ) def pytest_collection_modifyitems(config, items): @@ -193,14 +197,16 @@ async def deploy_model( ops_test: OpsTest, model_name: str, bundle: Bundle, + raise_on_blocked=True, ): """Add a juju model, deploy apps into it, wait for them to be active. Args: - request: handle to pytest requests from calling fixture - ops_test: Instance of the pytest-operator plugin - model_name: name of the model in which to deploy - bundle: Bundle object to deploy or redeploy into the model + request: handle to pytest requests from calling fixture + ops_test: Instance of the pytest-operator plugin + model_name: name of the model in which to deploy + bundle: Bundle object to deploy or redeploy into the model + raise_on_blocked: Raise if any unit in the model is blocked Yields: model object @@ -223,8 +229,8 @@ async def deploy_model( await the_model.wait_for_idle( apps=list(bundle.applications), status="active", - raise_on_blocked=True, - timeout=15 * 60, + raise_on_blocked=raise_on_blocked, + timeout=30 * 60, ) yield the_model @@ -232,16 +238,28 @@ async def deploy_model( @pytest_asyncio.fixture(scope="module") async def kubernetes_cluster(request: pytest.FixtureRequest, ops_test: OpsTest): """Deploy local kubernetes charms.""" + bundle_file = "test-bundle.yaml" + bundle_marker = request.node.get_closest_marker("bundle_file") + if bundle_marker: + bundle_file = bundle_marker.args[0] + + raise_on_blocked = True + ignore_blocked = request.node.get_closest_marker("ignore_blocked") + if ignore_blocked: + raise_on_blocked = False + + log.info("Deploying cluster using %s bundle.", bundle_file) + model = "main" charm_path = ("worker/k8s", "worker") charms = [Charm(ops_test, Path("charms") / p) for p in charm_path] charm_files = await asyncio.gather( *[charm.resolve(request.config.option.charm_files) for charm in charms] ) - bundle = Bundle(ops_test, Path(__file__).parent / "test-bundle.yaml") + bundle = Bundle(ops_test, Path(__file__).parent / "data" / bundle_file) for path, charm in zip(charm_files, charms): bundle.switch(charm.app_name, path) - async with deploy_model(request, ops_test, model, bundle) as the_model: + async with deploy_model(request, ops_test, model, bundle, raise_on_blocked) as the_model: yield the_model @@ -303,7 +321,7 @@ async def cos_lite_installed(ops_test: OpsTest, cos_model: Model): await cos_model.block_until( lambda: all(app in cos_model.applications for app in cos_charms), - timeout=60, + timeout=5 * 60, ) await cos_model.wait_for_idle(status="active", timeout=20 * 60, raise_on_error=False) diff --git a/tests/integration/cos_substrate.py b/tests/integration/cos_substrate.py index f35451cc..0e6c2764 100644 --- a/tests/integration/cos_substrate.py +++ b/tests/integration/cos_substrate.py @@ -12,7 +12,6 @@ from pylxd import Client from pylxd.exceptions import ClientConnectionFailed, LXDAPIException, NotFound -logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) IPAddress = Union[ipaddress.IPv4Address, ipaddress.IPv6Address] LXDExceptions = (NotFound, LXDAPIException, ClientConnectionFailed) diff --git a/tests/integration/data/test-bundle-etcd.yaml b/tests/integration/data/test-bundle-etcd.yaml new file mode 100644 index 00000000..20e7b18d --- /dev/null +++ b/tests/integration/data/test-bundle-etcd.yaml @@ -0,0 +1,30 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +name: integration-test-etcd +description: |- + Used to deploy or refresh within an integration test model +series: focal +applications: + easyrsa: + charm: easyrsa + channel: stable + num_units: 1 + etcd: + charm: etcd + channel: stable + num_units: 1 + k8s: + charm: k8s + channel: latest/edge + num_units: 1 + options: + datastore: etcd + k8s-worker: + charm: k8s-worker + channel: latest/edge + num_units: 1 +relations: + - [k8s, k8s-worker:cluster] + - [etcd, easyrsa:client] + - [etcd, k8s:etcd] diff --git a/tests/integration/test-bundle.yaml b/tests/integration/data/test-bundle.yaml similarity index 100% rename from tests/integration/test-bundle.yaml rename to tests/integration/data/test-bundle.yaml diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index 10665181..5bae28d7 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -2,12 +2,14 @@ # See LICENSE file for licensing details. """Additions to tools missing from juju library.""" +import json import logging from typing import Optional from juju.model import Model +from tenacity import retry, stop_after_attempt, wait_fixed -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) async def get_address(model: Model, app_name: str, unit_num: Optional[int] = None) -> str: @@ -28,3 +30,46 @@ async def get_address(model: Model, app_name: str, unit_num: Optional[int] = Non if unit_num is None else app["units"][f"{app_name}/{unit_num}"]["address"] ) + + +async def get_nodes(k8s): + """Return Node list + + Args: + k8s: any k8s unit + + Returns: + list of nodes + """ + action = await k8s.run("k8s kubectl get nodes -o json") + result = await action.wait() + assert result.results["return-code"] == 0, "Failed to get nodes with kubectl" + log.info("Parsing node list...") + node_list = json.loads(result.results["stdout"]) + assert node_list["kind"] == "List", "Should have found a list of nodes" + return node_list["items"] + + +@retry(reraise=True, stop=stop_after_attempt(12), wait=wait_fixed(15)) +async def ready_nodes(k8s, expected_count): + """Get a list of the ready nodes. + + Args: + k8s: k8s unit + expected_count: number of expected nodes + """ + log.info("Finding all nodes...") + nodes = await get_nodes(k8s) + ready_nodes = { + node["metadata"]["name"]: all( + condition["status"] == "False" + for condition in node["status"]["conditions"] + if condition["type"] != "Ready" + ) + for node in nodes + } + log.info("Found %d/%d nodes...", len(ready_nodes), expected_count) + assert len(ready_nodes) == expected_count, f"Expect {expected_count} nodes in the list" + for node, ready in ready_nodes.items(): + log.info("Node %s is %s..", node, "ready" if ready else "not ready") + assert ready, f"Node not yet ready: {node}." diff --git a/tests/integration/test_etcd.py b/tests/integration/test_etcd.py new file mode 100644 index 00000000..5e5d1619 --- /dev/null +++ b/tests/integration/test_etcd.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Integration tests.""" + +import pytest +from juju import model + +from .helpers import ready_nodes + +# This pytest mark configures the test environment to use the Canonical Kubernetes +# bundle with etcd, for all the test within this module. +pytestmark = [ + pytest.mark.bundle_file("test-bundle-etcd.yaml"), + pytest.mark.ignore_blocked, +] + + +@pytest.mark.abort_on_fail +async def test_nodes_ready(kubernetes_cluster: model.Model): + """Deploy the charm and wait for active/idle status.""" + k8s = kubernetes_cluster.applications["k8s"] + worker = kubernetes_cluster.applications["k8s-worker"] + expected_nodes = len(k8s.units) + len(worker.units) + await ready_nodes(k8s.units[0], expected_nodes) diff --git a/tests/integration/test_k8s.py b/tests/integration/test_k8s.py index fc752d35..10c79972 100644 --- a/tests/integration/test_k8s.py +++ b/tests/integration/test_k8s.py @@ -6,7 +6,6 @@ """Integration tests.""" import asyncio -import json import logging import pytest @@ -14,54 +13,12 @@ from tenacity import retry, stop_after_attempt, wait_fixed from .grafana import Grafana +from .helpers import get_nodes, ready_nodes from .prometheus import Prometheus log = logging.getLogger(__name__) -async def get_nodes(k8s): - """Return Node list - - Args: - k8s: any k8s unit - - Returns: - list of nodes - """ - action = await k8s.run("k8s kubectl get nodes -o json") - result = await action.wait() - assert result.results["return-code"] == 0, "Failed to get nodes with kubectl" - log.info("Parsing node list...") - node_list = json.loads(result.results["stdout"]) - assert node_list["kind"] == "List", "Should have found a list of nodes" - return node_list["items"] - - -@retry(reraise=True, stop=stop_after_attempt(12), wait=wait_fixed(15)) -async def ready_nodes(k8s, expected_count): - """Get a list of the ready nodes. - - Args: - k8s: k8s unit - expected_count: number of expected nodes - """ - log.info("Finding all nodes...") - nodes = await get_nodes(k8s) - ready_nodes = { - node["metadata"]["name"]: all( - condition["status"] == "False" - for condition in node["status"]["conditions"] - if condition["type"] != "Ready" - ) - for node in nodes - } - log.info("Found %d/%d nodes...", len(ready_nodes), expected_count) - assert len(ready_nodes) == expected_count, f"Expect {expected_count} nodes in the list" - for node, ready in ready_nodes.items(): - log.info("Node %s is %s..", node, "ready" if ready else "not ready") - assert ready, f"Node not yet ready: {node}." - - async def get_leader(app): """Find leader unit of an application. @@ -93,7 +50,7 @@ async def test_nodes_labelled(request, kubernetes_cluster: model.Model): worker: application.Application = kubernetes_cluster.applications["k8s-worker"] label_config = {"labels": f"{testname}="} await asyncio.gather(k8s.set_config(label_config), worker.set_config(label_config)) - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) try: nodes = await get_nodes(k8s.units[0]) @@ -110,7 +67,7 @@ async def test_nodes_labelled(request, kubernetes_cluster: model.Model): k8s.reset_config(list(label_config)), worker.reset_config(list(label_config)) ) - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) nodes = await get_nodes(k8s.units[0]) labelled = [n for n in nodes if testname in n["metadata"]["labels"]] juju_nodes = [n for n in nodes if "juju-charm" in n["metadata"]["labels"]] @@ -128,10 +85,10 @@ async def test_remove_worker(kubernetes_cluster: model.Model): # Remove a worker log.info("Remove unit %s", worker.units[0].name) await worker.units[0].destroy() - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) await ready_nodes(k8s.units[0], expected_nodes - 1) await worker.add_unit() - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) await ready_nodes(k8s.units[0], expected_nodes) @@ -149,10 +106,10 @@ async def test_remove_non_leader_control_plane(kubernetes_cluster: model.Model): # Remove a control-plane log.info("Remove unit %s", follower.name) await follower.destroy() - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) await ready_nodes(leader, expected_nodes - 1) await k8s.add_unit() - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) await ready_nodes(leader, expected_nodes) @@ -170,10 +127,10 @@ async def test_remove_leader_control_plane(kubernetes_cluster: model.Model): # Remove a control-plane log.info("Remove unit %s", leader.name) await leader.destroy() - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) await ready_nodes(follower, expected_nodes - 1) await k8s.add_unit() - await kubernetes_cluster.wait_for_idle(status="active", timeout=5 * 60) + await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) await ready_nodes(follower, expected_nodes) @@ -187,7 +144,7 @@ async def test_grafana( ): """Test integration with Grafana.""" grafana = Grafana(model_name=cos_model.name, host=traefik_address, password=grafana_password) - await asyncio.wait_for(grafana.is_ready(), timeout=5 * 60) + await asyncio.wait_for(grafana.is_ready(), timeout=10 * 60) dashboards = await grafana.dashboards_all() actual_dashboard_titles = set() diff --git a/tox.ini b/tox.ini index 5b4c8c82..0baea230 100644 --- a/tox.ini +++ b/tox.ini @@ -81,7 +81,7 @@ deps = commands = bandit -c {toxinidir}/pyproject.toml -r {[vars]all_path} -[testenv:integration] +[testenv:{integration,integration-k8s,integration-etcd}] description = Run integration tests deps = -r test_requirements.txt commands =