From 8e9d9eed50fcc093e43d0dea11d3079eb337890f Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Mon, 24 Jun 2024 22:45:55 +0300 Subject: [PATCH 1/5] Suppress oversee users in standby clusters --- actions.yaml | 2 ++ src/relations/async_replication.py | 16 ++++++++++ src/relations/postgresql_provider.py | 4 +++ tests/unit/test_async_replication.py | 48 ++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 tests/unit/test_async_replication.py diff --git a/actions.yaml b/actions.yaml index 6e3ced3d8b..9214dcd5bd 100644 --- a/actions.yaml +++ b/actions.yaml @@ -62,3 +62,5 @@ set-tls-private-key: private-key: type: string description: The content of private key for communications with clients. Content will be auto-generated if this option is not specified. +reenable-oversee-users: + description: Reenable purging of managed credentials after a standby cluster is promoted. diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index e2adab6d8e..3c84df48ae 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -108,6 +108,9 @@ def __init__(self, charm): self.framework.observe( self.charm.on.promote_to_primary_action, self._on_promote_to_primary ) + self.framework.observe( + self.charm.on.reenable_oversee_users_action, self._on_reenable_oversee_users + ) self.framework.observe(self.charm.on.secret_changed, self._on_secret_changed) @@ -193,6 +196,7 @@ def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool: filename = f"{POSTGRESQL_DATA_PATH}-{str(datetime.now()).replace(' ', '-').replace(':', '-')}.tar.gz" subprocess.check_call(f"tar -zcf {filename} {POSTGRESQL_DATA_PATH}".split()) logger.warning("Please review the backup file %s and handle its removal", filename) + self.charm.app_peer_data["suppress-oversee-users"] = "true" return True def get_all_primary_cluster_endpoints(self) -> List[str]: @@ -599,6 +603,18 @@ def _on_promote_to_primary(self, event: ActionEvent) -> None: # Set the status. self.charm.unit.status = MaintenanceStatus("Creating replication...") + def _on_reenable_oversee_users(self, event: ActionEvent) -> None: + """Re-enable oversee users after cluster was promoted.""" + if not self.charm.unit.is_leader(): + event.fail("Unit is not leader") + return + + if "suppress-oversee-users" not in self.charm.app_peer_data: + event.fail("Oversee users is not suppressed") + return + + del self.charm.app_peer_data["suppress-oversee-users"] + def _on_secret_changed(self, event: SecretChangedEvent) -> None: """Update the internal secret when the relation secret changes.""" relation = self._relation diff --git a/src/relations/postgresql_provider.py b/src/relations/postgresql_provider.py index 326ad27698..d3e1c88adf 100644 --- a/src/relations/postgresql_provider.py +++ b/src/relations/postgresql_provider.py @@ -137,6 +137,10 @@ def oversee_users(self) -> None: if not self.charm.unit.is_leader(): return + if "suppress-oversee-users" in self.charm.app_peer_data: + logger.debug("Oversee users is suppressed by peer data") + return + # Retrieve database users. try: database_users = { diff --git a/tests/unit/test_async_replication.py b/tests/unit/test_async_replication.py new file mode 100644 index 0000000000..861323dee2 --- /dev/null +++ b/tests/unit/test_async_replication.py @@ -0,0 +1,48 @@ +# Copyright 2023 Canonical Ltd. +# See LICENSE file for licensing details. +from unittest.mock import Mock + +import pytest +from ops.testing import Harness + +from charm import PostgresqlOperatorCharm + + +@pytest.fixture(autouse=True) +def harness(): + """Set up the test.""" + harness = Harness(PostgresqlOperatorCharm) + harness.begin() + upgrade_relation_id = harness.add_relation("upgrade", "postgresql") + peer_relation_id = harness.add_relation("database-peers", "postgresql") + for rel_id in (upgrade_relation_id, peer_relation_id): + harness.add_relation_unit(rel_id, "postgresql/1") + with harness.hooks_disabled(): + harness.update_relation_data(upgrade_relation_id, "postgresql/1", {"state": "idle"}) + yield harness + harness.cleanup() + + +def test_on_reenable_oversee_users(harness): + # Fail if unit is not leader + event = Mock() + + harness.charm.async_replication._on_reenable_oversee_users(event) + + event.fail.assert_called_once_with("Unit is not leader") + event.fail.reset_mock() + + # Fail if peer data is not set + with harness.hooks_disabled(): + harness.set_leader() + + harness.charm.async_replication._on_reenable_oversee_users(event) + + event.fail.assert_called_once_with("Oversee users is not suppressed") + event.fail.reset_mock() + + with harness.hooks_disabled(): + harness.charm._peers.data[harness.charm.app].update({"suppress-oversee-users": "true"}) + + harness.charm.async_replication._on_reenable_oversee_users(event) + assert harness.charm._peers.data[harness.charm.app] == {} From ba1a2600f76cf39bfce36f5165d6bb4f3f34af2f Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Tue, 25 Jun 2024 01:48:45 +0300 Subject: [PATCH 2/5] Add suppression test --- src/relations/async_replication.py | 2 +- .../ha_tests/test_async_replication.py | 64 ++++++++++++++++++- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 3c84df48ae..9aa04982cc 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -485,7 +485,7 @@ def is_primary_cluster(self) -> bool: return self.charm.app == self._get_primary_cluster() def _on_async_relation_broken(self, _) -> None: - if "departing" in self.charm._peers.data[self.charm.unit]: + if not self.charm._peers or "departing" in self.charm._peers.data[self.charm.unit]: logger.debug("Early exit on_async_relation_broken: Skipping departing unit.") return diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 96bdb3afa4..5faf249308 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -4,7 +4,7 @@ import contextlib import logging import subprocess -from asyncio import gather +from asyncio import gather, sleep from typing import Optional import psycopg2 @@ -41,6 +41,8 @@ IDLE_PERIOD = 5 TIMEOUT = 2000 +DATA_INTEGRATOR_APP_NAME = "data-integrator" + @contextlib.asynccontextmanager async def fast_forward( @@ -115,6 +117,14 @@ async def test_deploy_async_replication_setup( num_units=CLUSTER_SIZE, config={"profile": "testing"}, ) + if not await app_name(ops_test, DATA_INTEGRATOR_APP_NAME): + await ops_test.model.deploy( + DATA_INTEGRATOR_APP_NAME, + num_units=1, + channel="latest/edge", + config={"database-name": "testdb"}, + ) + await ops_test.model.relate(DATABASE_APP_NAME, DATA_INTEGRATOR_APP_NAME) if not await app_name(ops_test, model=second_model): charm = await ops_test.build_charm(".") await second_model.deploy( @@ -128,7 +138,7 @@ async def test_deploy_async_replication_setup( async with ops_test.fast_forward(), fast_forward(second_model): await gather( first_model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME], + apps=[DATABASE_APP_NAME, APPLICATION_NAME, DATA_INTEGRATOR_APP_NAME], status="active", timeout=TIMEOUT, ), @@ -218,6 +228,19 @@ async def test_async_replication( await check_writes(ops_test, extra_model=second_model) +@pytest.mark.group(1) +@markers.juju3 +@pytest.mark.abort_on_fail +async def test_get_data_integrator_credentials( + ops_test: OpsTest, +): + unit = ops_test.model.applications[DATA_INTEGRATOR_APP_NAME].units[0] + action = await unit.run_action(action_name="get-credentials") + result = await action.wait() + global data_integrator_credentials + data_integrator_credentials = result.results + + @pytest.mark.group(1) @markers.juju3 @pytest.mark.abort_on_fail @@ -273,6 +296,43 @@ async def test_switchover( await are_writes_increasing(ops_test, extra_model=second_model) +@pytest.mark.group(1) +@markers.juju3 +@pytest.mark.abort_on_fail +async def test_data_integrator_creds_keep_on_working( + ops_test: OpsTest, + second_model: Model, +) -> None: + user = data_integrator_credentials["postgresql"]["username"] + password = data_integrator_credentials["postgresql"]["password"] + database = data_integrator_credentials["postgresql"]["database"] + + any_unit = second_model.applications[DATABASE_APP_NAME].units[0].name + primary = await get_primary(ops_test, any_unit) + address = get_unit_address(ops_test, primary) + + connstr = f"dbname='{database}' user='{user}' host='{address}' port='5432' password='{password}' connect_timeout=1" + try: + with psycopg2.connect(connstr) as connection: + pass + finally: + connection.close() + + logger.info("Re-enable oversee users") + action = await primary.run_action(action_name="reenable-oversee-users") + await action.wait() + + async with ops_test.fast_forward(): + await sleep(20) + try: + with psycopg2.connect(connstr) as connection: + assert False + except psycopg2.errors.InsufficientPrivilege: + logger.info("Data integrator creds purged") + finally: + connection.close() + + @pytest.mark.group(1) @markers.juju3 @pytest.mark.abort_on_fail From d0d956a8b4cb3c4b3467fab135433a73f8337083 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Tue, 25 Jun 2024 02:13:00 +0300 Subject: [PATCH 3/5] Wait for model to idle --- tests/integration/ha_tests/test_async_replication.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 5faf249308..6f3f4b13cd 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -324,6 +324,11 @@ async def test_data_integrator_creds_keep_on_working( async with ops_test.fast_forward(): await sleep(20) + second_model.wait_for_idle( + apps=[DATABASE_APP_NAME], + status="active", + timeout=TIMEOUT, + ) try: with psycopg2.connect(connstr) as connection: assert False From 9bafcf6ca8294d5b6ecb866d59a28f5c74ea66e4 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Tue, 25 Jun 2024 04:53:17 +0300 Subject: [PATCH 4/5] Test fixes --- tests/integration/ha_tests/test_async_replication.py | 9 +++++---- tests/integration/helpers.py | 7 +++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 6f3f4b13cd..8adfa53651 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -308,8 +308,8 @@ async def test_data_integrator_creds_keep_on_working( database = data_integrator_credentials["postgresql"]["database"] any_unit = second_model.applications[DATABASE_APP_NAME].units[0].name - primary = await get_primary(ops_test, any_unit) - address = get_unit_address(ops_test, primary) + primary = await get_primary(ops_test, any_unit, second_model) + address = second_model.units.get(primary).public_address connstr = f"dbname='{database}' user='{user}' host='{address}' port='5432' password='{password}' connect_timeout=1" try: @@ -319,12 +319,13 @@ async def test_data_integrator_creds_keep_on_working( connection.close() logger.info("Re-enable oversee users") - action = await primary.run_action(action_name="reenable-oversee-users") + leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME, model=second_model) + action = await leader_unit.run_action(action_name="reenable-oversee-users") await action.wait() async with ops_test.fast_forward(): await sleep(20) - second_model.wait_for_idle( + await second_model.wait_for_idle( apps=[DATABASE_APP_NAME], status="active", timeout=TIMEOUT, diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index 8a2ad24aa3..67fa1b59f2 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -621,17 +621,20 @@ async def get_password(ops_test: OpsTest, unit_name: str, username: str = "opera stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30), ) -async def get_primary(ops_test: OpsTest, unit_name: str) -> str: +async def get_primary(ops_test: OpsTest, unit_name: str, model=None) -> str: """Get the primary unit. Args: ops_test: ops_test instance. unit_name: the name of the unit. + model: Model to use. Returns: the current primary unit. """ - action = await ops_test.model.units.get(unit_name).run_action("get-primary") + if not model: + model = ops_test.model + action = await model.units.get(unit_name).run_action("get-primary") action = await action.wait() return action.results["primary"] From 66521ab054c7f6b5b7918badd1e0732c6ca4f015 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Tue, 25 Jun 2024 12:35:04 +0300 Subject: [PATCH 5/5] Fix test --- tests/integration/ha_tests/test_async_replication.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 8adfa53651..0eef1518a7 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -323,7 +323,7 @@ async def test_data_integrator_creds_keep_on_working( action = await leader_unit.run_action(action_name="reenable-oversee-users") await action.wait() - async with ops_test.fast_forward(): + async with fast_forward(second_model, FAST_INTERVAL): await sleep(20) await second_model.wait_for_idle( apps=[DATABASE_APP_NAME], @@ -333,7 +333,7 @@ async def test_data_integrator_creds_keep_on_working( try: with psycopg2.connect(connstr) as connection: assert False - except psycopg2.errors.InsufficientPrivilege: + except psycopg2.OperationalError: logger.info("Data integrator creds purged") finally: connection.close()