From 713f6e47d6ce8de5271ce89301f372129639f4cd Mon Sep 17 00:00:00 2001 From: Dragomir Penev <6687393+dragomirp@users.noreply.github.com> Date: Tue, 25 Jun 2024 20:17:52 +0300 Subject: [PATCH] [MISC] Suppress oversee users in standby clusters (#507) * Suppress oversee users in standby clusters * Add suppression test * Wait for model to idle * Test fixes * Fix test * Remove action --- src/relations/async_replication.py | 3 +- src/relations/postgresql_provider.py | 19 +++++--- .../ha_tests/test_async_replication.py | 48 ++++++++++++++++++- tests/integration/helpers.py | 7 ++- 4 files changed, 66 insertions(+), 11 deletions(-) diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index e2adab6d8e..49eb672a9e 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -193,6 +193,7 @@ def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool: filename = f"{POSTGRESQL_DATA_PATH}-{str(datetime.now()).replace(' ', '-').replace(':', '-')}.tar.gz" subprocess.check_call(f"tar -zcf {filename} {POSTGRESQL_DATA_PATH}".split()) logger.warning("Please review the backup file %s and handle its removal", filename) + self.charm.app_peer_data["suppress-oversee-users"] = "true" return True def get_all_primary_cluster_endpoints(self) -> List[str]: @@ -481,7 +482,7 @@ def is_primary_cluster(self) -> bool: return self.charm.app == self._get_primary_cluster() def _on_async_relation_broken(self, _) -> None: - if "departing" in self.charm._peers.data[self.charm.unit]: + if not self.charm._peers or "departing" in self.charm._peers.data[self.charm.unit]: logger.debug("Early exit on_async_relation_broken: Skipping departing unit.") return diff --git a/src/relations/postgresql_provider.py b/src/relations/postgresql_provider.py index 326ad27698..6b105fa602 100644 --- a/src/relations/postgresql_provider.py +++ b/src/relations/postgresql_provider.py @@ -137,6 +137,8 @@ def oversee_users(self) -> None: if not self.charm.unit.is_leader(): return + delete_user = "suppress-oversee-users" not in self.charm.app_peer_data + # Retrieve database users. try: database_users = { @@ -159,13 +161,16 @@ def oversee_users(self) -> None: # Delete that users that exist in the database but not in the active relations. for user in database_users - relation_users: - try: - logger.info("Remove relation user: %s", user) - self.charm.set_secret(APP_SCOPE, user, None) - self.charm.set_secret(APP_SCOPE, f"{user}-database", None) - self.charm.postgresql.delete_user(user) - except PostgreSQLDeleteUserError: - logger.error(f"Failed to delete user {user}") + if delete_user: + try: + logger.info("Remove relation user: %s", user) + self.charm.set_secret(APP_SCOPE, user, None) + self.charm.set_secret(APP_SCOPE, f"{user}-database", None) + self.charm.postgresql.delete_user(user) + except PostgreSQLDeleteUserError: + logger.error("Failed to delete user %s", user) + else: + logger.info("Stale relation user detected: %s", user) def update_endpoints(self, event: DatabaseRequestedEvent = None) -> None: """Set the read/write and read-only endpoints.""" diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 96bdb3afa4..3d95292229 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -41,6 +41,8 @@ IDLE_PERIOD = 5 TIMEOUT = 2000 +DATA_INTEGRATOR_APP_NAME = "data-integrator" + @contextlib.asynccontextmanager async def fast_forward( @@ -115,6 +117,14 @@ async def test_deploy_async_replication_setup( num_units=CLUSTER_SIZE, config={"profile": "testing"}, ) + if not await app_name(ops_test, DATA_INTEGRATOR_APP_NAME): + await ops_test.model.deploy( + DATA_INTEGRATOR_APP_NAME, + num_units=1, + channel="latest/edge", + config={"database-name": "testdb"}, + ) + await ops_test.model.relate(DATABASE_APP_NAME, DATA_INTEGRATOR_APP_NAME) if not await app_name(ops_test, model=second_model): charm = await ops_test.build_charm(".") await second_model.deploy( @@ -128,7 +138,7 @@ async def test_deploy_async_replication_setup( async with ops_test.fast_forward(), fast_forward(second_model): await gather( first_model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME], + apps=[DATABASE_APP_NAME, APPLICATION_NAME, DATA_INTEGRATOR_APP_NAME], status="active", timeout=TIMEOUT, ), @@ -218,6 +228,19 @@ async def test_async_replication( await check_writes(ops_test, extra_model=second_model) +@pytest.mark.group(1) +@markers.juju3 +@pytest.mark.abort_on_fail +async def test_get_data_integrator_credentials( + ops_test: OpsTest, +): + unit = ops_test.model.applications[DATA_INTEGRATOR_APP_NAME].units[0] + action = await unit.run_action(action_name="get-credentials") + result = await action.wait() + global data_integrator_credentials + data_integrator_credentials = result.results + + @pytest.mark.group(1) @markers.juju3 @pytest.mark.abort_on_fail @@ -273,6 +296,29 @@ async def test_switchover( await are_writes_increasing(ops_test, extra_model=second_model) +@pytest.mark.group(1) +@markers.juju3 +@pytest.mark.abort_on_fail +async def test_data_integrator_creds_keep_on_working( + ops_test: OpsTest, + second_model: Model, +) -> None: + user = data_integrator_credentials["postgresql"]["username"] + password = data_integrator_credentials["postgresql"]["password"] + database = data_integrator_credentials["postgresql"]["database"] + + any_unit = second_model.applications[DATABASE_APP_NAME].units[0].name + primary = await get_primary(ops_test, any_unit, second_model) + address = second_model.units.get(primary).public_address + + connstr = f"dbname='{database}' user='{user}' host='{address}' port='5432' password='{password}' connect_timeout=1" + try: + with psycopg2.connect(connstr) as connection: + pass + finally: + connection.close() + + @pytest.mark.group(1) @markers.juju3 @pytest.mark.abort_on_fail diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index 8a2ad24aa3..67fa1b59f2 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -621,17 +621,20 @@ async def get_password(ops_test: OpsTest, unit_name: str, username: str = "opera stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30), ) -async def get_primary(ops_test: OpsTest, unit_name: str) -> str: +async def get_primary(ops_test: OpsTest, unit_name: str, model=None) -> str: """Get the primary unit. Args: ops_test: ops_test instance. unit_name: the name of the unit. + model: Model to use. Returns: the current primary unit. """ - action = await ops_test.model.units.get(unit_name).run_action("get-primary") + if not model: + model = ops_test.model + action = await model.units.get(unit_name).run_action("get-primary") action = await action.wait() return action.results["primary"]