From 18dd753e484bd9766c8b9d613d36598477c30c51 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Thu, 26 Sep 2024 11:28:07 +0200 Subject: [PATCH 01/16] added tls cert from s3 --- src/cosl/coordinated_workers/coordinator.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index 1396cea..bff57d8 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -12,6 +12,7 @@ import socket from dataclasses import dataclass from functools import partial +from pathlib import Path from typing import ( Any, Callable, @@ -63,6 +64,8 @@ logger = logging.getLogger(__name__) +S3_CA_CERT_PATH = "./s3_ca.crt" + # The paths of the base rules to be rendered in CONSOLIDATED_ALERT_RULES_PATH NGINX_ORIGINAL_ALERT_RULES_PATH = "./src/prometheus_alert_rules/nginx" WORKER_ORIGINAL_ALERT_RULES_PATH = "./src/prometheus_alert_rules/workers" @@ -452,6 +455,11 @@ def _s3_config(self) -> Dict[str, Any]: s3_config["access_key_id"] = s3_data.pop("access-key") s3_config["secret_access_key"] = s3_data.pop("secret-key") s3_config["bucket_name"] = s3_data.pop("bucket") + ca_chain = s3_data.get("tls-ca-chain") + if ca_chain: + # put the cacert to disk + Path(S3_CA_CERT_PATH).write_text(ca_chain[0]) # TODO: is any cert in the chain good? + s3_config["tls_ca_path"] = S3_CA_CERT_PATH return s3_config @property From 7feacbc4a7976a012b155b59bf188a7145d2322b Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Thu, 26 Sep 2024 11:28:31 +0200 Subject: [PATCH 02/16] vbump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d1f1e3d..b5a3be4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "cosl" -version = "0.0.35" +version = "0.0.36" authors = [ { name="sed-i", email="82407168+sed-i@users.noreply.github.com" }, ] From 759c1f87359c9a46f6332b57a4b5c6ad722ca3bf Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Fri, 27 Sep 2024 10:06:07 +0200 Subject: [PATCH 03/16] cert goes in workload container --- src/cosl/coordinated_workers/coordinator.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index c2e4d83..a705074 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -28,6 +28,7 @@ import ops import yaml +from ops import Container import cosl from cosl.coordinated_workers.interface import ClusterProvider, RemoteWriteEndpoint @@ -64,7 +65,7 @@ logger = logging.getLogger(__name__) -S3_CA_CERT_PATH = "./s3_ca.crt" +S3_TLS_CA_CHAIN_PATH = "/etc/worker/s3_ca.crt" # The paths of the base rules to be rendered in CONSOLIDATED_ALERT_RULES_PATH NGINX_ORIGINAL_ALERT_RULES_PATH = "./src/prometheus_alert_rules/nginx" @@ -235,6 +236,7 @@ def __init__( partial(resources_requests, self) if resources_requests is not None else None ) self._container_name = container_name + self._container: Container = charm.unit.get_container(container_name) self._resources_limit_options = resources_limit_options or {} self.remote_write_endpoints_getter = remote_write_endpoints @@ -458,8 +460,14 @@ def _s3_config(self) -> Dict[str, Any]: ca_chain = s3_data.get("tls-ca-chain") if ca_chain: # put the cacert to disk - Path(S3_CA_CERT_PATH).write_text(ca_chain[0]) # TODO: is any cert in the chain good? - s3_config["tls_ca_path"] = S3_CA_CERT_PATH + container = self._container + # FIXME: s3 gives us a cert chain. tempo's s3 config takes: + # tls_cert_path: Path to the client certificate file. + # tls_key_path: Path to the private client key file. + # tls_ca_path: Path to the CA certificate file. + # tls_server_name: Path to the CA certificate file. # not a typo: it's the same + container.push(S3_TLS_CA_CHAIN_PATH, "\n\n".join(ca_chain)) + s3_config["tls_cert_path"] = S3_TLS_CA_CHAIN_PATH return s3_config @property From 8db021a3c809b26b9f814bfe9117597798e6d36e Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Fri, 27 Sep 2024 14:25:52 +0200 Subject: [PATCH 04/16] more tests --- src/cosl/coordinated_workers/coordinator.py | 96 +++++++++++-------- src/cosl/coordinated_workers/interface.py | 31 ++++-- src/cosl/coordinated_workers/worker.py | 78 +++++++-------- tests/test_coordinated_workers/conftest.py | 13 +-- .../test_coordinator.py | 61 ++++++++++++ tests/test_coordinated_workers/test_worker.py | 70 ++++++++++++++ 6 files changed, 247 insertions(+), 102 deletions(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index a705074..78fd0b7 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -12,7 +12,6 @@ import socket from dataclasses import dataclass from functools import partial -from pathlib import Path from typing import ( Any, Callable, @@ -27,10 +26,11 @@ from urllib.parse import urlparse import ops +import pydantic import yaml -from ops import Container import cosl +from cosl.coordinated_workers import worker from cosl.coordinated_workers.interface import ClusterProvider, RemoteWriteEndpoint from cosl.coordinated_workers.nginx import ( Nginx, @@ -65,8 +65,6 @@ logger = logging.getLogger(__name__) -S3_TLS_CA_CHAIN_PATH = "/etc/worker/s3_ca.crt" - # The paths of the base rules to be rendered in CONSOLIDATED_ALERT_RULES_PATH NGINX_ORIGINAL_ALERT_RULES_PATH = "./src/prometheus_alert_rules/nginx" WORKER_ORIGINAL_ALERT_RULES_PATH = "./src/prometheus_alert_rules/workers" @@ -82,6 +80,22 @@ class ClusterRolesConfigError(Exception): """Raised when the ClusterRolesConfig instance is not properly configured.""" +class S3ConnectionInfo(pydantic.BaseModel): + """Model for the s3 relation databag, as returned by the s3 charm lib.""" + + # they don't use it, we do + + model_config = {"populate_by_name": True} + + endpoint: str + bucket: str + access_key: str = pydantic.Field(alias="access-key") + secret_key: str = pydantic.Field(alias="secret-key") + + region: Optional[str] = pydantic.Field(None) + tls_ca_chain: Optional[str] = pydantic.Field(None, alias="tls-ca-chain") + + @dataclass class ClusterRolesConfig: """Worker roles and deployment requirements.""" @@ -236,7 +250,6 @@ def __init__( partial(resources_requests, self) if resources_requests is not None else None ) self._container_name = container_name - self._container: Container = charm.unit.get_container(container_name) self._resources_limit_options = resources_limit_options or {} self.remote_write_endpoints_getter = remote_write_endpoints @@ -430,6 +443,14 @@ def tls_available(self) -> bool: and (self.cert_handler.ca_cert is not None) ) + @property + def s3_connection_info(self) -> S3ConnectionInfo: + """Cast and validate the untyped s3 databag to something we can handle.""" + try: + return S3ConnectionInfo(**self.s3_requirer.get_s3_connection_info()) + except pydantic.ValidationError: + raise S3NotFoundError("s3 integration inactive or interface corrupt") + @property def _s3_config(self) -> Dict[str, Any]: """The s3 configuration from relation data. @@ -439,35 +460,24 @@ def _s3_config(self) -> Dict[str, Any]: Raises: S3NotFoundError: The s3 integration is inactive. """ - s3_data = self.s3_requirer.get_s3_connection_info() - s3_config: Dict[str, Any] = {} - if not ( - s3_data - and "bucket" in s3_data - and "endpoint" in s3_data - and "access-key" in s3_data - and "secret-key" in s3_data - ): - raise S3NotFoundError("s3 integration inactive") - s3_config["insecure"] = not s3_data["endpoint"].startswith("https://") - s3_config["endpoint"] = re.sub( - rf"^{urlparse(s3_data['endpoint']).scheme}://", "", s3_data["endpoint"] - ) - s3_config["region"] = s3_data.get("region", "") - s3_config["access_key_id"] = s3_data.pop("access-key") - s3_config["secret_access_key"] = s3_data.pop("secret-key") - s3_config["bucket_name"] = s3_data.pop("bucket") - ca_chain = s3_data.get("tls-ca-chain") - if ca_chain: - # put the cacert to disk - container = self._container + s3_data = self.s3_connection_info + s3_config = { + "endpoint": re.sub(rf"^{urlparse(s3_data.endpoint).scheme}://", "", s3_data.endpoint), + "region": s3_data.region, + "access_key_id": s3_data.access_key, + "secret_access_key": s3_data.secret_key, + "bucket_name": s3_data.bucket, + "insecure": not s3_data.tls_ca_chain, # FIXME: s3 gives us a cert chain. tempo's s3 config takes: # tls_cert_path: Path to the client certificate file. # tls_key_path: Path to the private client key file. # tls_ca_path: Path to the CA certificate file. # tls_server_name: Path to the CA certificate file. # not a typo: it's the same - container.push(S3_TLS_CA_CHAIN_PATH, "\n\n".join(ca_chain)) - s3_config["tls_cert_path"] = S3_TLS_CA_CHAIN_PATH + # we send to the worker the chain itself, but the tempo config actually wants a path to a file + # the worker will be responsible for putting it there + "tls_cert_path": worker.S3_TLS_CA_CHAIN_FILE if s3_data.tls_ca_chain else None, + } + return s3_config @property @@ -520,24 +530,23 @@ def _local_ip(self) -> Optional[str]: def _workers_scrape_jobs(self) -> List[Dict[str, Any]]: """The Prometheus scrape jobs for the workers connected to the coordinator.""" scrape_jobs: List[Dict[str, Any]] = [] - worker_topologies = self.cluster.gather_topology() - for worker in worker_topologies: + for worker_topology in self.cluster.gather_topology(): job = { "static_configs": [ { - "targets": [f"{worker['address']}:{self._worker_metrics_port}"], + "targets": [f"{worker_topology['address']}:{self._worker_metrics_port}"], } ], # setting these as "labels" in the static config gets some of them # replaced by the coordinator topology # https://github.com/canonical/prometheus-k8s-operator/issues/571 "relabel_configs": [ - {"target_label": "juju_charm", "replacement": worker["charm_name"]}, - {"target_label": "juju_unit", "replacement": worker["unit"]}, + {"target_label": "juju_charm", "replacement": worker_topology["charm_name"]}, + {"target_label": "juju_unit", "replacement": worker_topology["unit"]}, { "target_label": "juju_application", - "replacement": worker["application"], + "replacement": worker_topology["application"], }, {"target_label": "juju_model", "replacement": self.model.name}, {"target_label": "juju_model_uuid", "replacement": self.model.uuid}, @@ -718,6 +727,7 @@ def update_cluster(self): if self.remote_write_endpoints_getter else None ), + s3_tls_ca_cert=self._s3_config, ) def _render_workers_alert_rules(self): @@ -725,24 +735,26 @@ def _render_workers_alert_rules(self): self._remove_rendered_alert_rules() apps: Set[str] = set() - for worker in self.cluster.gather_topology(): - if worker["application"] in apps: + for worker_topology in self.cluster.gather_topology(): + if worker_topology["application"] in apps: continue - apps.add(worker["application"]) + apps.add(worker_topology["application"]) topology_dict = { "model": self.model.name, "model_uuid": self.model.uuid, - "application": worker["application"], - "unit": worker["unit"], - "charm_name": worker["charm_name"], + "application": worker_topology["application"], + "unit": worker_topology["unit"], + "charm_name": worker_topology["charm_name"], } topology = cosl.JujuTopology.from_dict(topology_dict) alert_rules = cosl.AlertRules(query_type="promql", topology=topology) alert_rules.add_path(WORKER_ORIGINAL_ALERT_RULES_PATH, recursive=True) alert_rules_contents = yaml.dump(alert_rules.as_dict()) - file_name = f"{CONSOLIDATED_ALERT_RULES_PATH}/rendered_{worker['application']}.rules" + file_name = ( + f"{CONSOLIDATED_ALERT_RULES_PATH}/rendered_{worker_topology['application']}.rules" + ) with open(file_name, "w") as writer: writer.write(alert_rules_contents) diff --git a/src/cosl/coordinated_workers/interface.py b/src/cosl/coordinated_workers/interface.py index e1bcef0..1cba121 100644 --- a/src/cosl/coordinated_workers/interface.py +++ b/src/cosl/coordinated_workers/interface.py @@ -12,6 +12,7 @@ import collections import json import logging +from collections import namedtuple from typing import ( Any, Counter, @@ -171,7 +172,7 @@ class ClusterRequirerUnitData(DatabagModel): class ClusterProviderAppData(DatabagModel): - """App data the the coordinator sends to the worker.""" + """App data that the coordinator sends to the worker.""" ### worker node configuration worker_config: str @@ -189,7 +190,18 @@ class ClusterProviderAppData(DatabagModel): ca_cert: Optional[str] = None server_cert: Optional[str] = None privkey_secret_id: Optional[str] = None - """TLS Config""" + s3_tls_ca_chain: Optional[str] = None + + +TLSData = namedtuple( + "TLSData", + [ + "ca_cert", + "server_cert", + "privkey_secret_id", + "s3_tls_ca_chain", + ], +) class ClusterChangedEvent(ops.EventBase): @@ -268,6 +280,7 @@ def publish_data( worker_config: str, ca_cert: Optional[str] = None, server_cert: Optional[str] = None, + s3_tls_ca_cert: Optional[str] = None, privkey_secret_id: Optional[str] = None, loki_endpoints: Optional[Dict[str, str]] = None, tracing_receivers: Optional[Dict[str, str]] = None, @@ -284,6 +297,7 @@ def publish_data( privkey_secret_id=privkey_secret_id, tracing_receivers=tracing_receivers, remote_write_endpoints=remote_write_endpoints, + s3_tls_ca_cert=s3_tls_ca_cert, ) local_app_databag.dump(relation.data[self.model.app]) @@ -517,7 +531,7 @@ def get_loki_endpoints(self) -> Dict[str, str]: return data.loki_endpoints or {} return {} - def get_tls_data(self) -> Optional[Dict[str, str]]: + def get_tls_data(self) -> Optional[TLSData]: """Fetch certificates and the private key secrets id for the worker config.""" data = self._get_data_from_coordinator() if not data: @@ -526,11 +540,12 @@ def get_tls_data(self) -> Optional[Dict[str, str]]: if not data.ca_cert or not data.server_cert or not data.privkey_secret_id: return None - return { - "ca_cert": data.ca_cert, - "server_cert": data.server_cert, - "privkey_secret_id": data.privkey_secret_id, - } + return TLSData( + ca_cert=data.ca_cert, + server_cert=data.server_cert, + privkey_secret_id=data.privkey_secret_id, + s3_tls_ca_chain=data.s3_tls_ca_chain, + ) def get_tracing_receivers(self) -> Optional[Dict[str, str]]: """Fetch the tracing receivers from the coordinator databag.""" diff --git a/src/cosl/coordinated_workers/worker.py b/src/cosl/coordinated_workers/worker.py index 26f2f82..572755f 100644 --- a/src/cosl/coordinated_workers/worker.py +++ b/src/cosl/coordinated_workers/worker.py @@ -40,8 +40,10 @@ BASE_DIR = "/worker" CONFIG_FILE = "/etc/worker/config.yaml" CERT_FILE = "/etc/worker/server.cert" +S3_TLS_CA_CHAIN_FILE = "/etc/worker/s3_ca.crt" KEY_FILE = "/etc/worker/private.key" CLIENT_CA_FILE = "/etc/worker/ca.cert" +ROOT_CA_CERT = Path("/usr/local/share/ca-certificates/ca.crt") logger = logging.getLogger(__name__) @@ -70,9 +72,6 @@ def _validate_container_name( """Mapping of the resources limit option names that the charms use, as defined in config.yaml.""" -ROOT_CA_CERT = Path("/usr/local/share/ca-certificates/ca.crt") - - class WorkerError(Exception): """Base class for exceptions raised by this module.""" @@ -143,7 +142,7 @@ def __init__( self._name = name self._pebble_layer = partial( pebble_layer, self - ) # do not call this directly. use self.pebble_layer instead + ) # do not call this directly. use self.pebble_layer instead self.topology = JujuTopology.from_charm(self._charm) self._container = self._charm.unit.get_container(name) @@ -553,64 +552,59 @@ def _update_tls_certificates(self) -> bool: if not self._container.can_connect(): return False + any_changes = False if tls_data := self.cluster.get_tls_data(): - private_key_secret = self.model.get_secret(id=tls_data["privkey_secret_id"]) + private_key_secret = self.model.get_secret(id=tls_data.privkey_secret_id) private_key = private_key_secret.get_content().get("private-key") - ca_cert = tls_data["ca_cert"] - server_cert = tls_data["server_cert"] - - # Read the current content of the files (if they exist) - current_server_cert = ( - self._container.pull(CERT_FILE).read() if self._container.exists(CERT_FILE) else "" - ) - current_private_key = ( - self._container.pull(KEY_FILE).read() if self._container.exists(KEY_FILE) else "" - ) - current_ca_cert = ( - self._container.pull(CLIENT_CA_FILE).read() - if self._container.exists(CLIENT_CA_FILE) - else "" - ) - - if ( - current_server_cert == server_cert - and current_private_key == private_key - and current_ca_cert == ca_cert + for new_contents, file in ( + (tls_data.ca_cert, CLIENT_CA_FILE), + (tls_data.server_cert, CERT_FILE), + (private_key, KEY_FILE), + (tls_data.s3_tls_ca_chain, S3_TLS_CA_CHAIN_FILE), ): - # No update needed - return False + if self._container.exists(file): + current_contents = self._container.pull(file).read() + if current_contents == new_contents: + continue - # Save the workload certificates - self._container.push(CERT_FILE, server_cert or "", make_dirs=True) - self._container.push(KEY_FILE, private_key or "", make_dirs=True) - self._container.push(CLIENT_CA_FILE, ca_cert or "", make_dirs=True) - self._container.push(ROOT_CA_CERT, ca_cert or "", make_dirs=True) + any_changes = True + self._container.push(file, new_contents or "", make_dirs=True) # Save the cacert in the charm container for charm traces - ROOT_CA_CERT.write_text(ca_cert) - else: + ROOT_CA_CERT.write_text(tls_data.ca_cert) + if not any_changes: + return False + logger.debug("found new tls data in cluster. synced with container fs") - if not ( - self._container.exists(CERT_FILE) - or self._container.exists(KEY_FILE) - or self._container.exists(CLIENT_CA_FILE) - or self._container.exists(ROOT_CA_CERT) + else: + if not any( + self._container.exists(file) + for file in ( + CERT_FILE, + KEY_FILE, + CLIENT_CA_FILE, + ROOT_CA_CERT, + S3_TLS_CA_CHAIN_FILE, + ) ): # No update needed return False + logger.debug("no tls data in cluster. wiping files...") self._container.remove_path(CERT_FILE, recursive=True) self._container.remove_path(KEY_FILE, recursive=True) self._container.remove_path(CLIENT_CA_FILE, recursive=True) self._container.remove_path(ROOT_CA_CERT, recursive=True) + self._container.remove_path(S3_TLS_CA_CHAIN_FILE, recursive=True) # Remove from charm container ROOT_CA_CERT.unlink(missing_ok=True) - # FIXME: uncomment as soon as the nginx image contains the ca-certificates package - self._container.exec(["update-ca-certificates", "--fresh"]).wait() - subprocess.run(["update-ca-certificates", "--fresh"]) + if any_changes: + logger.debug("running update-ca-certificates") + self._container.exec(["update-ca-certificates", "--fresh"]).wait() + subprocess.run(["update-ca-certificates", "--fresh"]) return True diff --git a/tests/test_coordinated_workers/conftest.py b/tests/test_coordinated_workers/conftest.py index f7f0d66..d0f324a 100644 --- a/tests/test_coordinated_workers/conftest.py +++ b/tests/test_coordinated_workers/conftest.py @@ -1,6 +1,5 @@ from contextlib import ExitStack from pathlib import Path -from typing import Generator from unittest.mock import patch import pytest @@ -8,7 +7,7 @@ @pytest.fixture(autouse=True) -def patch_all(): +def patch_all(tmp_path: Path): with ExitStack() as stack: # so we don't have to wait for minutes: stack.enter_context( @@ -36,15 +35,9 @@ def patch_all(): ) ) + # Prevent the worker's _update_tls_certificates method to try and write our local filesystem stack.enter_context( - patch("cosl.coordinated_workers.worker.Worker.running_version", new=lambda _: "42.42") + patch("cosl.coordinated_workers.worker.ROOT_CA_CERT", new=tmp_path / "rootcacert") ) yield - - -@pytest.fixture(autouse=True) -def root_ca_cert(tmp_path: Path) -> Generator[Path, None, None]: - # Prevent the charm's _update_tls_certificates method to try and write our local filesystem - with patch("src.cosl.coordinated_workers.worker.ROOT_CA_CERT", new=tmp_path / "rootcacert"): - yield tmp_path / "rootcacert" diff --git a/tests/test_coordinated_workers/test_coordinator.py b/tests/test_coordinated_workers/test_coordinator.py index d5c0b31..8168750 100644 --- a/tests/test_coordinated_workers/test_coordinator.py +++ b/tests/test_coordinated_workers/test_coordinator.py @@ -173,3 +173,64 @@ def test_without_s3_integration_raises_error( # THEN the _s3_config method raises and S3NotFoundError with pytest.raises(S3NotFoundError): mgr.charm.coordinator._s3_config + + +@pytest.mark.parametrize("region", (None, "canada")) +@pytest.mark.parametrize("tls_ca_chain", (None, "my ca chain")) +@pytest.mark.parametrize("bucket", ("bucky",)) +@pytest.mark.parametrize("secret_key", ("foo",)) +@pytest.mark.parametrize("access_key", ("foo",)) +@pytest.mark.parametrize( + "endpoint, endpoint_stripped", + ( + ("example.com", "example.com"), + ("http://example.com", "example.com"), + ("https://example.com", "example.com"), + ), +) +def test_s3_integration( + coordinator_state: State, + coordinator_charm: ops.CharmBase, + region, + endpoint, + endpoint_stripped, + secret_key, + access_key, + bucket, + tls_ca_chain, +): + # Test that a charm with a s3 integration gives the expected _s3_config + + # GIVEN a coordinator charm with a s3 integration + ctx = Context(coordinator_charm, meta=coordinator_charm.META) + s3_relation = coordinator_state.get_relations("my-s3")[0] + relations_except_s3 = [ + relation for relation in coordinator_state.relations if relation.endpoint != "my-s3" + ] + s3_app_data = { + **({"region": region} if region else {}), + **({"tls-ca-chain": tls_ca_chain} if tls_ca_chain else {}), + "endpoint": endpoint, + "access-key": access_key, + "secret-key": secret_key, + "bucket": bucket, + } + + # WHEN we process any event + with ctx.manager( + "update-status", + state=coordinator_state.replace( + relations=relations_except_s3 + [s3_relation.replace(remote_app_data=s3_app_data)] + ), + ) as mgr: + + # THEN the s3_connection_info method returns the expected data structure + coordinator: Coordinator = mgr.charm.coordinator + assert coordinator.s3_connection_info.region == region + assert coordinator.s3_connection_info.bucket == bucket + assert coordinator.s3_connection_info.endpoint == endpoint + assert coordinator.s3_connection_info.secret_key == secret_key + assert coordinator.s3_connection_info.access_key == access_key + assert coordinator.s3_connection_info.tls_ca_chain == tls_ca_chain + assert coordinator._s3_config["endpoint"] == endpoint_stripped + assert coordinator._s3_config["insecure"] is (not tls_ca_chain) diff --git a/tests/test_coordinated_workers/test_worker.py b/tests/test_coordinated_workers/test_worker.py index 2ae72a9..379abf7 100644 --- a/tests/test_coordinated_workers/test_worker.py +++ b/tests/test_coordinated_workers/test_worker.py @@ -16,6 +16,7 @@ CLIENT_CA_FILE, CONFIG_FILE, KEY_FILE, + S3_TLS_CA_CHAIN_FILE, Worker, ) from tests.test_coordinated_workers.test_worker_status import k8s_patch @@ -454,24 +455,29 @@ def test_worker_does_not_restart_on_no_cert_changed(restart_mock, tmp_path): "ca_cert": json.dumps("ca"), "server_cert": json.dumps("cert"), "privkey_secret_id": json.dumps("private_id"), + "s3_tls_ca_chain": json.dumps("s3_ca"), }, ) cert = tmp_path / "cert.cert" key = tmp_path / "key.key" client_ca = tmp_path / "client_ca.cert" + s3_ca_chain = tmp_path / "s3_ca_chain.cert" cert.write_text("cert") key.write_text("private") client_ca.write_text("ca") + s3_ca_chain.write_text("s3_ca") container = Container( "foo", can_connect=True, + exec_mock={("update-ca-certificates", "--fresh"): ExecOutput()}, mounts={ "cert": Mount(CERT_FILE, cert), "key": Mount(KEY_FILE, key), "client_ca": Mount(CLIENT_CA_FILE, client_ca), + "s3_ca_chain": Mount(S3_TLS_CA_CHAIN_FILE, s3_ca_chain), }, ) @@ -523,3 +529,67 @@ def __init__(self, framework: Framework): ) assert not _update_config_mock.called + + +@patch.object(Worker, "_update_worker_config", MagicMock(return_value=False)) +@patch.object(Worker, "_set_pebble_layer", MagicMock(return_value=False)) +@patch.object(Worker, "restart") +def test_worker_certs_update(restart_mock, tmp_path): + # GIVEN a worker with no cert files on disk, and a cluster relation giving us some cert data + ctx = Context( + MyCharm, + meta={ + "name": "foo", + "requires": {"cluster": {"interface": "cluster"}}, + "containers": {"foo": {"type": "oci-image"}}, + }, + config={"options": {"role-all": {"type": "boolean", "default": True}}}, + ) + relation = Relation( + "cluster", + remote_app_data={ + "worker_config": json.dumps("some: yaml"), + "ca_cert": json.dumps("ca"), + "server_cert": json.dumps("cert"), + "privkey_secret_id": json.dumps("private_id"), + "s3_tls_ca_chain": json.dumps("s3_ca"), + }, + ) + + cert = tmp_path / "cert.cert" + key = tmp_path / "key.key" + client_ca = tmp_path / "client_ca.cert" + s3_ca_chain = tmp_path / "s3_ca_chain.cert" + + container = Container( + "foo", + can_connect=True, + exec_mock={("update-ca-certificates", "--fresh"): ExecOutput()}, + mounts={ + "cert": Mount(CERT_FILE, cert), + "key": Mount(KEY_FILE, key), + "client_ca": Mount(CLIENT_CA_FILE, client_ca), + "s3_ca_chain": Mount(S3_TLS_CA_CHAIN_FILE, s3_ca_chain), + }, + ) + + secret = Secret( + "secret:private_id", + label="private_id", + owner="app", + contents={0: {"private-key": "private"}}, + ) + # WHEN the charm receives any event + ctx.run( + "update_status", + State(leader=True, containers=[container], relations=[relation], secrets=[secret]), + ) + + # THEN the worker writes all tls data to the right locations on the container filesystem + assert cert.read_text() == "cert" + assert key.read_text() == "private" + assert client_ca.read_text() == "ca" + assert s3_ca_chain.read_text() == "s3_ca" + + # AND the worker restarts the workload + assert restart_mock.call_count == 1 From bf4363d10af82c0c00cee9b6b18b93e0aa17bcd2 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Fri, 27 Sep 2024 14:33:16 +0200 Subject: [PATCH 05/16] vbump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5a3be4..4e39820 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "cosl" -version = "0.0.36" +version = "0.0.37" authors = [ { name="sed-i", email="82407168+sed-i@users.noreply.github.com" }, ] From a0946df5cbccc4ba97b4a3dcb012255e5f8c4e2e Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Fri, 27 Sep 2024 14:40:31 +0200 Subject: [PATCH 06/16] static fixes --- src/cosl/coordinated_workers/coordinator.py | 2 +- src/cosl/coordinated_workers/interface.py | 22 ++++++++++----------- src/cosl/coordinated_workers/worker.py | 7 ++++--- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index 78fd0b7..d28f15e 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -727,7 +727,7 @@ def update_cluster(self): if self.remote_write_endpoints_getter else None ), - s3_tls_ca_cert=self._s3_config, + s3_tls_ca_chain=self.s3_connection_info.tls_ca_chain, ) def _render_workers_alert_rules(self): diff --git a/src/cosl/coordinated_workers/interface.py b/src/cosl/coordinated_workers/interface.py index 1cba121..f7db93a 100644 --- a/src/cosl/coordinated_workers/interface.py +++ b/src/cosl/coordinated_workers/interface.py @@ -12,7 +12,6 @@ import collections import json import logging -from collections import namedtuple from typing import ( Any, Counter, @@ -22,6 +21,7 @@ List, Mapping, MutableMapping, + NamedTuple, Optional, Set, ) @@ -193,15 +193,13 @@ class ClusterProviderAppData(DatabagModel): s3_tls_ca_chain: Optional[str] = None -TLSData = namedtuple( - "TLSData", - [ - "ca_cert", - "server_cert", - "privkey_secret_id", - "s3_tls_ca_chain", - ], -) +class TLSData(NamedTuple): + """Section of the cluster data that concerns TLS information.""" + + ca_cert: Optional[str] + server_cert: Optional[str] + privkey_secret_id: Optional[str] + s3_tls_ca_chain: Optional[str] class ClusterChangedEvent(ops.EventBase): @@ -280,7 +278,7 @@ def publish_data( worker_config: str, ca_cert: Optional[str] = None, server_cert: Optional[str] = None, - s3_tls_ca_cert: Optional[str] = None, + s3_tls_ca_chain: Optional[str] = None, privkey_secret_id: Optional[str] = None, loki_endpoints: Optional[Dict[str, str]] = None, tracing_receivers: Optional[Dict[str, str]] = None, @@ -297,7 +295,7 @@ def publish_data( privkey_secret_id=privkey_secret_id, tracing_receivers=tracing_receivers, remote_write_endpoints=remote_write_endpoints, - s3_tls_ca_cert=s3_tls_ca_cert, + s3_tls_ca_chain=s3_tls_ca_chain, ) local_app_databag.dump(relation.data[self.model.app]) diff --git a/src/cosl/coordinated_workers/worker.py b/src/cosl/coordinated_workers/worker.py index 572755f..a3a6df6 100644 --- a/src/cosl/coordinated_workers/worker.py +++ b/src/cosl/coordinated_workers/worker.py @@ -556,7 +556,7 @@ def _update_tls_certificates(self) -> bool: if tls_data := self.cluster.get_tls_data(): private_key_secret = self.model.get_secret(id=tls_data.privkey_secret_id) private_key = private_key_secret.get_content().get("private-key") - + new_contents: Optional[str] for new_contents, file in ( (tls_data.ca_cert, CLIENT_CA_FILE), (tls_data.server_cert, CERT_FILE), @@ -572,7 +572,8 @@ def _update_tls_certificates(self) -> bool: self._container.push(file, new_contents or "", make_dirs=True) # Save the cacert in the charm container for charm traces - ROOT_CA_CERT.write_text(tls_data.ca_cert) + if tls_data.ca_cert: + ROOT_CA_CERT.write_text(tls_data.ca_cert) if not any_changes: return False logger.debug("found new tls data in cluster. synced with container fs") @@ -730,7 +731,7 @@ def charm_tracing_config(self) -> Tuple[Optional[str], Optional[str]]: is_https = endpoint.startswith("https://") tls_data = self.cluster.get_tls_data() - server_ca_cert = tls_data.get("server_cert") if tls_data else None + server_ca_cert = tls_data.server_cert if tls_data else None if is_https: if server_ca_cert is None: From 79273492c4c698d2b583c0ac02958f00a59280a8 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Fri, 27 Sep 2024 15:41:24 +0200 Subject: [PATCH 07/16] ca path and not cert path --- src/cosl/coordinated_workers/coordinator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index d28f15e..9c4ee2d 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -475,7 +475,7 @@ def _s3_config(self) -> Dict[str, Any]: # tls_server_name: Path to the CA certificate file. # not a typo: it's the same # we send to the worker the chain itself, but the tempo config actually wants a path to a file # the worker will be responsible for putting it there - "tls_cert_path": worker.S3_TLS_CA_CHAIN_FILE if s3_data.tls_ca_chain else None, + "tls_ca_path": worker.S3_TLS_CA_CHAIN_FILE if s3_data.tls_ca_chain else None, } return s3_config From 3a6d874d4595952caa654fa03de02ce36458b010 Mon Sep 17 00:00:00 2001 From: Mateusz Kulewicz Date: Mon, 30 Sep 2024 11:15:17 +0200 Subject: [PATCH 08/16] Use List[str] for tls_ca_chain and fix tests --- src/cosl/coordinated_workers/coordinator.py | 2 +- .../test_coordinator.py | 19 ++++++++++++------- tests/test_coordinated_workers/test_worker.py | 10 ++++++++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index 9c4ee2d..d652810 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -93,7 +93,7 @@ class S3ConnectionInfo(pydantic.BaseModel): secret_key: str = pydantic.Field(alias="secret-key") region: Optional[str] = pydantic.Field(None) - tls_ca_chain: Optional[str] = pydantic.Field(None, alias="tls-ca-chain") + tls_ca_chain: Optional[List[str]] = pydantic.Field(None, alias="tls-ca-chain") @dataclass diff --git a/tests/test_coordinated_workers/test_coordinator.py b/tests/test_coordinated_workers/test_coordinator.py index 8168750..dbc4cc4 100644 --- a/tests/test_coordinated_workers/test_coordinator.py +++ b/tests/test_coordinated_workers/test_coordinator.py @@ -1,3 +1,5 @@ +import json + import ops import pytest from ops import Framework @@ -176,7 +178,7 @@ def test_without_s3_integration_raises_error( @pytest.mark.parametrize("region", (None, "canada")) -@pytest.mark.parametrize("tls_ca_chain", (None, "my ca chain")) +@pytest.mark.parametrize("tls_ca_chain", (None, ["my ca chain"])) @pytest.mark.parametrize("bucket", ("bucky",)) @pytest.mark.parametrize("secret_key", ("foo",)) @pytest.mark.parametrize("access_key", ("foo",)) @@ -208,12 +210,15 @@ def test_s3_integration( relation for relation in coordinator_state.relations if relation.endpoint != "my-s3" ] s3_app_data = { - **({"region": region} if region else {}), - **({"tls-ca-chain": tls_ca_chain} if tls_ca_chain else {}), - "endpoint": endpoint, - "access-key": access_key, - "secret-key": secret_key, - "bucket": bucket, + k: json.dumps(v) + for k, v in { + **({"region": region} if region else {}), + **({"tls-ca-chain": tls_ca_chain} if tls_ca_chain else {}), + "endpoint": endpoint, + "access-key": access_key, + "secret-key": secret_key, + "bucket": bucket, + }.items() } # WHEN we process any event diff --git a/tests/test_coordinated_workers/test_worker.py b/tests/test_coordinated_workers/test_worker.py index 379abf7..d38cf1c 100644 --- a/tests/test_coordinated_workers/test_worker.py +++ b/tests/test_coordinated_workers/test_worker.py @@ -149,7 +149,10 @@ def test_worker_restarts_if_some_service_not_up(tmp_path): "foo", can_connect=True, mounts={"local": Mount(CONFIG_FILE, cfg)}, - exec_mock={("update-ca-certificates", "--fresh"): ExecOutput()}, + exec_mock={ + ("update-ca-certificates", "--fresh"): ExecOutput(), + ("/bin/foo", "-version"): ExecOutput(stdout="foo"), + }, service_status={ "foo": ServiceStatus.INACTIVE, "bar": ServiceStatus.ACTIVE, @@ -215,7 +218,10 @@ def test_worker_does_not_restart_external_services(tmp_path): cfg.write_text("some: yaml") container = Container( "foo", - exec_mock={("update-ca-certificates", "--fresh"): ExecOutput()}, + exec_mock={ + ("update-ca-certificates", "--fresh"): ExecOutput(), + ("/bin/foo", "-version"): ExecOutput(stdout="foo"), + }, can_connect=True, mounts={"local": Mount(CONFIG_FILE, cfg)}, layers={"foo": MyCharm.layer, "bar": other_layer}, From 62bd7b6edecbae5e88dc54ce93f98da376e3f242 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Mon, 30 Sep 2024 11:33:40 +0200 Subject: [PATCH 09/16] unified ca chain when passing it to the worker --- src/cosl/coordinated_workers/coordinator.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index d652810..72b4e95 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -95,6 +95,11 @@ class S3ConnectionInfo(pydantic.BaseModel): region: Optional[str] = pydantic.Field(None) tls_ca_chain: Optional[List[str]] = pydantic.Field(None, alias="tls-ca-chain") + @property + def ca_cert(self) -> Optional[str]: + """Unify the ca chain provided by the lib into a single cert.""" + return "\n\n".join(self.tls_ca_chain) if self.tls_ca_chain else None + @dataclass class ClusterRolesConfig: @@ -727,7 +732,7 @@ def update_cluster(self): if self.remote_write_endpoints_getter else None ), - s3_tls_ca_chain=self.s3_connection_info.tls_ca_chain, + s3_tls_ca_chain=self.s3_connection_info.ca_cert, ) def _render_workers_alert_rules(self): From e8946522d70ef8f10fd58ee37f598795fcc63415 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Mon, 30 Sep 2024 15:42:51 +0200 Subject: [PATCH 10/16] split tls update/wipe logic --- src/cosl/coordinated_workers/worker.py | 96 ++++++++++++-------------- 1 file changed, 46 insertions(+), 50 deletions(-) diff --git a/src/cosl/coordinated_workers/worker.py b/src/cosl/coordinated_workers/worker.py index a3a6df6..b123715 100644 --- a/src/cosl/coordinated_workers/worker.py +++ b/src/cosl/coordinated_workers/worker.py @@ -22,7 +22,7 @@ from ops.pebble import Check, Layer, PathError, Plan, ProtocolError from cosl import JujuTopology -from cosl.coordinated_workers.interface import ClusterRequirer +from cosl.coordinated_workers.interface import ClusterRequirer, TLSData from cosl.helpers import check_libs_installed check_libs_installed( @@ -547,67 +547,63 @@ def _update_worker_config(self) -> bool: return False - def _update_tls_certificates(self) -> bool: - """Update the TLS certificates on disk according to their availability.""" - if not self._container.can_connect(): - return False + def _wipe_tls_files(self): + logger.debug("no tls data in cluster. wiping files...") any_changes = False - if tls_data := self.cluster.get_tls_data(): - private_key_secret = self.model.get_secret(id=tls_data.privkey_secret_id) - private_key = private_key_secret.get_content().get("private-key") - new_contents: Optional[str] - for new_contents, file in ( - (tls_data.ca_cert, CLIENT_CA_FILE), - (tls_data.server_cert, CERT_FILE), - (private_key, KEY_FILE), - (tls_data.s3_tls_ca_chain, S3_TLS_CA_CHAIN_FILE), - ): - if self._container.exists(file): - current_contents = self._container.pull(file).read() - if current_contents == new_contents: - continue - + for file in (CLIENT_CA_FILE, CERT_FILE, KEY_FILE, S3_TLS_CA_CHAIN_FILE): + if self._container.exists(file): any_changes = True - self._container.push(file, new_contents or "", make_dirs=True) - - # Save the cacert in the charm container for charm traces - if tls_data.ca_cert: - ROOT_CA_CERT.write_text(tls_data.ca_cert) - if not any_changes: - return False - logger.debug("found new tls data in cluster. synced with container fs") + self._container.remove_path(file, recursive=True) - else: - if not any( - self._container.exists(file) - for file in ( - CERT_FILE, - KEY_FILE, - CLIENT_CA_FILE, - ROOT_CA_CERT, - S3_TLS_CA_CHAIN_FILE, - ) - ): - # No update needed - return False - logger.debug("no tls data in cluster. wiping files...") + # Remove from charm container + ROOT_CA_CERT.unlink(missing_ok=True) + return any_changes - self._container.remove_path(CERT_FILE, recursive=True) - self._container.remove_path(KEY_FILE, recursive=True) - self._container.remove_path(CLIENT_CA_FILE, recursive=True) - self._container.remove_path(ROOT_CA_CERT, recursive=True) - self._container.remove_path(S3_TLS_CA_CHAIN_FILE, recursive=True) + def _write_tls_files(self, tls_data: TLSData): + logger.debug("tls config in cluster. writing to container...") - # Remove from charm container - ROOT_CA_CERT.unlink(missing_ok=True) + private_key_secret = self.model.get_secret(id=tls_data.privkey_secret_id) + private_key = private_key_secret.get_content().get("private-key") + new_contents: Optional[str] + any_changes = False + for new_contents, file in ( + (tls_data.ca_cert, CLIENT_CA_FILE), + (tls_data.server_cert, CERT_FILE), + (private_key, KEY_FILE), + (tls_data.s3_tls_ca_chain, S3_TLS_CA_CHAIN_FILE), + ): + if self._container.exists(file): + current_contents = self._container.pull(file).read() + if current_contents == new_contents: + continue + + any_changes = True + self._container.push(file, new_contents or "", make_dirs=True) + + # Save the cacert in the charm container for charm traces + if tls_data.ca_cert: + ROOT_CA_CERT.write_text(tls_data.ca_cert) if any_changes: logger.debug("running update-ca-certificates") self._container.exec(["update-ca-certificates", "--fresh"]).wait() subprocess.run(["update-ca-certificates", "--fresh"]) - return True + return any_changes + + def _update_tls_certificates(self) -> bool: + """Update the TLS certificates on disk according to their availability. + + Return True if we need to restart the workload after this update. + """ + if not self._container.can_connect(): + return False + + if tls_data := self.cluster.get_tls_data(): + return self._write_tls_files(tls_data) + else: + return self._wipe_tls_files() def restart(self): """Restart the pebble service or start it if not already running, then wait for it to become ready. From 3e19b6c1ecb220e9e8beab2546a2b8b0d5e51b8d Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Mon, 30 Sep 2024 15:56:00 +0200 Subject: [PATCH 11/16] allow nones in tls data --- src/cosl/coordinated_workers/interface.py | 6 ++++-- src/cosl/coordinated_workers/worker.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/cosl/coordinated_workers/interface.py b/src/cosl/coordinated_workers/interface.py index f7db93a..83e8a87 100644 --- a/src/cosl/coordinated_workers/interface.py +++ b/src/cosl/coordinated_workers/interface.py @@ -529,13 +529,15 @@ def get_loki_endpoints(self) -> Dict[str, str]: return data.loki_endpoints or {} return {} - def get_tls_data(self) -> Optional[TLSData]: + def get_tls_data(self, allow_none: bool = False) -> Optional[TLSData]: """Fetch certificates and the private key secrets id for the worker config.""" data = self._get_data_from_coordinator() if not data: return None - if not data.ca_cert or not data.server_cert or not data.privkey_secret_id: + if not allow_none and ( + not data.ca_cert or not data.server_cert or not data.privkey_secret_id + ): return None return TLSData( diff --git a/src/cosl/coordinated_workers/worker.py b/src/cosl/coordinated_workers/worker.py index b123715..0625a39 100644 --- a/src/cosl/coordinated_workers/worker.py +++ b/src/cosl/coordinated_workers/worker.py @@ -562,9 +562,12 @@ def _wipe_tls_files(self): def _write_tls_files(self, tls_data: TLSData): logger.debug("tls config in cluster. writing to container...") + if tls_data.privkey_secret_id: + private_key_secret = self.model.get_secret(id=tls_data.privkey_secret_id) + private_key = private_key_secret.get_content().get("private-key") + else: + private_key = None - private_key_secret = self.model.get_secret(id=tls_data.privkey_secret_id) - private_key = private_key_secret.get_content().get("private-key") new_contents: Optional[str] any_changes = False for new_contents, file in ( @@ -573,6 +576,9 @@ def _write_tls_files(self, tls_data: TLSData): (private_key, KEY_FILE), (tls_data.s3_tls_ca_chain, S3_TLS_CA_CHAIN_FILE), ): + if not new_contents: + continue + if self._container.exists(file): current_contents = self._container.pull(file).read() if current_contents == new_contents: @@ -600,7 +606,7 @@ def _update_tls_certificates(self) -> bool: if not self._container.can_connect(): return False - if tls_data := self.cluster.get_tls_data(): + if tls_data := self.cluster.get_tls_data(allow_none=True): return self._write_tls_files(tls_data) else: return self._wipe_tls_files() From e5f88bf5508e5c5904ef098f1d3f7f11ea1b5a5a Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Tue, 1 Oct 2024 12:16:48 +0200 Subject: [PATCH 12/16] simpler sync logic --- src/cosl/coordinated_workers/interface.py | 4 +- src/cosl/coordinated_workers/worker.py | 47 ++++++++------- tests/test_coordinated_workers/test_worker.py | 58 +++++++++++++++++++ 3 files changed, 83 insertions(+), 26 deletions(-) diff --git a/src/cosl/coordinated_workers/interface.py b/src/cosl/coordinated_workers/interface.py index 83e8a87..82634c1 100644 --- a/src/cosl/coordinated_workers/interface.py +++ b/src/cosl/coordinated_workers/interface.py @@ -535,9 +535,9 @@ def get_tls_data(self, allow_none: bool = False) -> Optional[TLSData]: if not data: return None - if not allow_none and ( + if ( not data.ca_cert or not data.server_cert or not data.privkey_secret_id - ): + ) and not allow_none: return None return TLSData( diff --git a/src/cosl/coordinated_workers/worker.py b/src/cosl/coordinated_workers/worker.py index 0625a39..4d841c6 100644 --- a/src/cosl/coordinated_workers/worker.py +++ b/src/cosl/coordinated_workers/worker.py @@ -547,20 +547,7 @@ def _update_worker_config(self) -> bool: return False - def _wipe_tls_files(self): - logger.debug("no tls data in cluster. wiping files...") - - any_changes = False - for file in (CLIENT_CA_FILE, CERT_FILE, KEY_FILE, S3_TLS_CA_CHAIN_FILE): - if self._container.exists(file): - any_changes = True - self._container.remove_path(file, recursive=True) - - # Remove from charm container - ROOT_CA_CERT.unlink(missing_ok=True) - return any_changes - - def _write_tls_files(self, tls_data: TLSData): + def _sync_tls_files(self, tls_data: TLSData): logger.debug("tls config in cluster. writing to container...") if tls_data.privkey_secret_id: private_key_secret = self.model.get_secret(id=tls_data.privkey_secret_id) @@ -577,24 +564,30 @@ def _write_tls_files(self, tls_data: TLSData): (tls_data.s3_tls_ca_chain, S3_TLS_CA_CHAIN_FILE), ): if not new_contents: + if self._container.exists(file): + any_changes = True + self._container.remove_path(file, recursive=True) + logger.debug(f"{file} deleted") + continue + + logger.debug(f"{file} skipped") continue if self._container.exists(file): current_contents = self._container.pull(file).read() if current_contents == new_contents: + logger.debug(f"{file} unchanged") continue + logger.debug(f"{file} updated") any_changes = True - self._container.push(file, new_contents or "", make_dirs=True) + self._container.push(file, new_contents, make_dirs=True) # Save the cacert in the charm container for charm traces if tls_data.ca_cert: ROOT_CA_CERT.write_text(tls_data.ca_cert) - - if any_changes: - logger.debug("running update-ca-certificates") - self._container.exec(["update-ca-certificates", "--fresh"]).wait() - subprocess.run(["update-ca-certificates", "--fresh"]) + else: + ROOT_CA_CERT.unlink(missing_ok=True) return any_changes @@ -606,10 +599,16 @@ def _update_tls_certificates(self) -> bool: if not self._container.can_connect(): return False - if tls_data := self.cluster.get_tls_data(allow_none=True): - return self._write_tls_files(tls_data) - else: - return self._wipe_tls_files() + tls_data = self.cluster.get_tls_data(allow_none=True) + if not tls_data: + return False + + any_changes = self._sync_tls_files(tls_data) + if any_changes: + logger.debug("running update-ca-certificates") + self._container.exec(["update-ca-certificates", "--fresh"]).wait() + subprocess.run(["update-ca-certificates", "--fresh"]) + return any_changes def restart(self): """Restart the pebble service or start it if not already running, then wait for it to become ready. diff --git a/tests/test_coordinated_workers/test_worker.py b/tests/test_coordinated_workers/test_worker.py index d38cf1c..51af37f 100644 --- a/tests/test_coordinated_workers/test_worker.py +++ b/tests/test_coordinated_workers/test_worker.py @@ -599,3 +599,61 @@ def test_worker_certs_update(restart_mock, tmp_path): # AND the worker restarts the workload assert restart_mock.call_count == 1 + + +@patch.object(Worker, "_update_worker_config", MagicMock(return_value=False)) +@patch.object(Worker, "_set_pebble_layer", MagicMock(return_value=False)) +@patch.object(Worker, "restart") +@pytest.mark.parametrize("s3_ca_on_disk", (True, False)) +def test_worker_certs_update_only_s3(restart_mock, tmp_path, s3_ca_on_disk): + # GIVEN a worker with a tls-encrypted s3 bucket + ctx = Context( + MyCharm, + meta={ + "name": "foo", + "requires": {"cluster": {"interface": "cluster"}}, + "containers": {"foo": {"type": "oci-image"}}, + }, + config={"options": {"role-all": {"type": "boolean", "default": True}}}, + ) + relation = Relation( + "cluster", + remote_app_data={ + "worker_config": json.dumps("some: yaml"), + "s3_tls_ca_chain": json.dumps("s3_ca"), + }, + ) + + cert = tmp_path / "cert.cert" + key = tmp_path / "key.key" + client_ca = tmp_path / "client_ca.cert" + s3_ca_chain = tmp_path / "s3_ca_chain.cert" + if s3_ca_on_disk: + s3_ca_chain.write_text("s3_ca") + + container = Container( + "foo", + can_connect=True, + exec_mock={("update-ca-certificates", "--fresh"): ExecOutput()}, + mounts={ + "cert": Mount(CERT_FILE, cert), + "key": Mount(KEY_FILE, key), + "client_ca": Mount(CLIENT_CA_FILE, client_ca), + "s3_ca_chain": Mount(S3_TLS_CA_CHAIN_FILE, s3_ca_chain), + }, + ) + + # WHEN the charm receives any event + ctx.run( + "update_status", + State(leader=True, containers=[container], relations=[relation]), + ) + + # THEN the worker writes all tls data to the right locations on the container filesystem + assert not cert.exists() + assert not key.exists() + assert not client_ca.exists() + assert s3_ca_chain.read_text() == "s3_ca" + + # AND the worker restarts the workload IF it was not on disk already + assert restart_mock.call_count == (0 if s3_ca_on_disk else 1) From dc37cf4e70b1239fa5de5e40fb8436de13306875 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Tue, 1 Oct 2024 13:47:29 +0200 Subject: [PATCH 13/16] better status collection --- src/cosl/coordinated_workers/coordinator.py | 27 ++++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index 72b4e95..fe4784b 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -638,21 +638,30 @@ def _on_config_changed(self, _: ops.ConfigChangedEvent): # keep this event handler at the bottom def _on_collect_unit_status(self, e: ops.CollectStatusEvent): # todo add [nginx.workload] statuses + statuses = [] if self.resources_patch and self.resources_patch.get_status().name != "active": - e.add_status(self.resources_patch.get_status()) + statuses.append(self.resources_patch.get_status()) if not self.cluster.has_workers: - e.add_status(ops.BlockedStatus("[consistency] Missing any worker relation.")) - if not self.is_coherent: - e.add_status(ops.BlockedStatus("[consistency] Cluster inconsistent.")) - if not self.s3_ready: - e.add_status(ops.BlockedStatus("[consistency] Missing S3 integration.")) + statuses.append(ops.BlockedStatus("[consistency] Missing any worker relation.")) + elif not self.is_coherent: + statuses.append(ops.BlockedStatus("[consistency] Cluster inconsistent.")) elif not self.is_recommended: # if is_recommended is None: it means we don't have a recommended deployment criterion. - e.add_status(ops.ActiveStatus("[coordinator] Degraded.")) - else: - e.add_status(ops.ActiveStatus()) + statuses.append(ops.ActiveStatus("Degraded.")) + + if not self.s3_requirer.relations: + statuses.append(ops.BlockedStatus("[s3] Missing S3 integration.")) + elif not self.s3_ready: + statuses.append(ops.BlockedStatus("[s3] S3 not ready (probably misconfigured).")) + + if not statuses: + statuses.append(ops.ActiveStatus()) + + for status in statuses: + e.add_status(status) + ################### # UTILITY METHODS # From 293d752a01df02777dcb5ca4f8e90098d99df497 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Wed, 2 Oct 2024 08:54:31 +0200 Subject: [PATCH 14/16] pr comments and merge --- src/cosl/coordinated_workers/coordinator.py | 10 ++-------- src/cosl/coordinated_workers/worker.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index fe4784b..0bc3bff 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -473,13 +473,8 @@ def _s3_config(self) -> Dict[str, Any]: "secret_access_key": s3_data.secret_key, "bucket_name": s3_data.bucket, "insecure": not s3_data.tls_ca_chain, - # FIXME: s3 gives us a cert chain. tempo's s3 config takes: - # tls_cert_path: Path to the client certificate file. - # tls_key_path: Path to the private client key file. - # tls_ca_path: Path to the CA certificate file. - # tls_server_name: Path to the CA certificate file. # not a typo: it's the same - # we send to the worker the chain itself, but the tempo config actually wants a path to a file - # the worker will be responsible for putting it there + # the tempo config wants a path to a file here. We pass the cert chain separately + # over the cluster relation; the worker will be responsible for writing the file to disk "tls_ca_path": worker.S3_TLS_CA_CHAIN_FILE if s3_data.tls_ca_chain else None, } @@ -662,7 +657,6 @@ def _on_collect_unit_status(self, e: ops.CollectStatusEvent): for status in statuses: e.add_status(status) - ################### # UTILITY METHODS # ################### diff --git a/src/cosl/coordinated_workers/worker.py b/src/cosl/coordinated_workers/worker.py index 4d841c6..f149e41 100644 --- a/src/cosl/coordinated_workers/worker.py +++ b/src/cosl/coordinated_workers/worker.py @@ -584,11 +584,11 @@ def _sync_tls_files(self, tls_data: TLSData): self._container.push(file, new_contents, make_dirs=True) # Save the cacert in the charm container for charm traces + # we do it unconditionally to avoid the extra complexity. if tls_data.ca_cert: ROOT_CA_CERT.write_text(tls_data.ca_cert) else: ROOT_CA_CERT.unlink(missing_ok=True) - return any_changes def _update_tls_certificates(self) -> bool: From f7cd3950c74dddb494f07597138074f74a715172 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Wed, 2 Oct 2024 08:59:11 +0200 Subject: [PATCH 15/16] statuc --- src/cosl/coordinated_workers/coordinator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cosl/coordinated_workers/coordinator.py b/src/cosl/coordinated_workers/coordinator.py index 0bc3bff..61d84be 100644 --- a/src/cosl/coordinated_workers/coordinator.py +++ b/src/cosl/coordinated_workers/coordinator.py @@ -28,6 +28,7 @@ import ops import pydantic import yaml +from ops import StatusBase import cosl from cosl.coordinated_workers import worker @@ -452,7 +453,8 @@ def tls_available(self) -> bool: def s3_connection_info(self) -> S3ConnectionInfo: """Cast and validate the untyped s3 databag to something we can handle.""" try: - return S3ConnectionInfo(**self.s3_requirer.get_s3_connection_info()) + # we have to type-ignore here because the s3 lib's type annotation is wrong + return S3ConnectionInfo(**self.s3_requirer.get_s3_connection_info()) # type: ignore except pydantic.ValidationError: raise S3NotFoundError("s3 integration inactive or interface corrupt") @@ -633,7 +635,7 @@ def _on_config_changed(self, _: ops.ConfigChangedEvent): # keep this event handler at the bottom def _on_collect_unit_status(self, e: ops.CollectStatusEvent): # todo add [nginx.workload] statuses - statuses = [] + statuses: List[StatusBase] = [] if self.resources_patch and self.resources_patch.get_status().name != "active": statuses.append(self.resources_patch.get_status()) From f4d6b78111eb5785528852809fd986295f6c3fe5 Mon Sep 17 00:00:00 2001 From: Pietro Pasotti Date: Wed, 2 Oct 2024 08:59:48 +0200 Subject: [PATCH 16/16] unit --- tests/test_coordinated_workers/test_coordinator_status.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_coordinated_workers/test_coordinator_status.py b/tests/test_coordinated_workers/test_coordinator_status.py index de7f142..def1021 100644 --- a/tests/test_coordinated_workers/test_coordinator_status.py +++ b/tests/test_coordinated_workers/test_coordinator_status.py @@ -138,7 +138,7 @@ def test_status_check_no_s3(ctx, base_state, worker, caplog): state_out = ctx.run("config_changed", state) # THEN the charm sets blocked - assert state_out.unit_status == BlockedStatus("[consistency] Missing S3 integration.") + assert state_out.unit_status == BlockedStatus("[s3] Missing S3 integration.") @patch( @@ -188,4 +188,4 @@ def test_status_check_k8s_patch_success_after_retries( MagicMock(return_value=ActiveStatus("")), ): state_out = ctx.run("update_status", state_intermediate) - assert state_out.unit_status == ActiveStatus("[coordinator] Degraded.") + assert state_out.unit_status == ActiveStatus("Degraded.")