From 3209fc155b7688b9b29e4510017aea6a64ffa431 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Mon, 27 May 2024 17:09:11 -0300 Subject: [PATCH 01/13] Syncing the UX with MySQL Signed-off-by: Marcelo Henrique Neppel --- actions.yaml | 9 +- metadata.yaml | 8 +- src/relations/async_replication.py | 130 +++++++++++------- .../ha_tests/test_async_replication.py | 24 ++-- 4 files changed, 103 insertions(+), 68 deletions(-) diff --git a/actions.yaml b/actions.yaml index 7364321fa7..f5daf6944d 100644 --- a/actions.yaml +++ b/actions.yaml @@ -3,6 +3,13 @@ create-backup: description: Creates a backup to s3 storage. +create-replication: + description: Set up asynchronous replication between two clusters. + params: + name: + type: string + description: The name of the replication (defaults to 'default'). + default: default get-primary: description: Get the unit which is the primary/leader in the replication. get-password: @@ -17,7 +24,7 @@ list-backups: description: Lists backups in s3 storage. pre-upgrade-check: description: Run necessary pre-upgrade checks and preparations before executing a charm refresh. -promote-cluster: +promote-to-primary: description: Promotes the cluster of choice to a primary cluster. Must be ran against the leader unit. params: force-promotion: diff --git a/metadata.yaml b/metadata.yaml index cd29daf381..57addc1e9b 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -26,8 +26,8 @@ peers: interface: upgrade provides: - async-primary: - interface: async_replication + replication-offer: + interface: postgresql_async limit: 1 optional: true database: @@ -41,8 +41,8 @@ provides: limit: 1 requires: - async-replica: - interface: async_replication + replication: + interface: postgresql_async limit: 1 optional: true certificates: diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 1e4df96c88..1328e55879 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -51,9 +51,9 @@ logger = logging.getLogger(__name__) -ASYNC_PRIMARY_RELATION = "async-primary" -ASYNC_REPLICA_RELATION = "async-replica" READ_ONLY_MODE_BLOCKING_MESSAGE = "Cluster in read-only mode" +REPLICATION_CONSUMER_RELATION = "replication" +REPLICATION_OFFER_RELATION = "replication-offer" class PostgreSQLAsyncReplication(Object): @@ -64,36 +64,47 @@ def __init__(self, charm): super().__init__(charm, "postgresql") self.charm = charm self.framework.observe( - self.charm.on[ASYNC_PRIMARY_RELATION].relation_joined, self._on_async_relation_joined + self.charm.on[REPLICATION_OFFER_RELATION].relation_joined, + self._on_async_relation_joined, ) self.framework.observe( - self.charm.on[ASYNC_REPLICA_RELATION].relation_joined, self._on_async_relation_joined + self.charm.on[REPLICATION_CONSUMER_RELATION].relation_joined, + self._on_async_relation_joined, ) self.framework.observe( - self.charm.on[ASYNC_PRIMARY_RELATION].relation_changed, self._on_async_relation_changed + self.charm.on[REPLICATION_OFFER_RELATION].relation_changed, + self._on_async_relation_changed, ) self.framework.observe( - self.charm.on[ASYNC_REPLICA_RELATION].relation_changed, self._on_async_relation_changed + self.charm.on[REPLICATION_CONSUMER_RELATION].relation_changed, + self._on_async_relation_changed, ) # Departure events self.framework.observe( - self.charm.on[ASYNC_PRIMARY_RELATION].relation_departed, + self.charm.on[REPLICATION_OFFER_RELATION].relation_departed, self._on_async_relation_departed, ) self.framework.observe( - self.charm.on[ASYNC_REPLICA_RELATION].relation_departed, + self.charm.on[REPLICATION_CONSUMER_RELATION].relation_departed, self._on_async_relation_departed, ) self.framework.observe( - self.charm.on[ASYNC_PRIMARY_RELATION].relation_broken, self._on_async_relation_broken + self.charm.on[REPLICATION_OFFER_RELATION].relation_broken, + self._on_async_relation_broken, ) self.framework.observe( - self.charm.on[ASYNC_REPLICA_RELATION].relation_broken, self._on_async_relation_broken + self.charm.on[REPLICATION_CONSUMER_RELATION].relation_broken, + self._on_async_relation_broken, ) # Actions - self.framework.observe(self.charm.on.promote_cluster_action, self._on_promote_cluster) + self.framework.observe( + self.charm.on.create_replication_action, self._on_create_replication + ) + self.framework.observe( + self.charm.on.promote_to_primary_action, self._on_promote_to_primary + ) def _can_promote_cluster(self, event: ActionEvent) -> bool: """Check if the cluster can be promoted.""" @@ -180,7 +191,7 @@ def _configure_primary_cluster( def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool: """Configure the standby cluster.""" relation = self._relation - if relation.name == ASYNC_REPLICA_RELATION: + if relation.name == REPLICATION_CONSUMER_RELATION: # Update the secrets between the clusters. primary_cluster_info = relation.data[relation.app].get("primary-cluster-data") secret_id = ( @@ -214,8 +225,8 @@ def _get_highest_promoted_cluster_counter_value(self) -> str: """Return the highest promoted cluster counter.""" promoted_cluster_counter = "0" for async_relation in [ - self.model.get_relation(ASYNC_PRIMARY_RELATION), - self.model.get_relation(ASYNC_REPLICA_RELATION), + self.model.get_relation(REPLICATION_OFFER_RELATION), + self.model.get_relation(REPLICATION_CONSUMER_RELATION), ]: if async_relation is None: continue @@ -249,8 +260,8 @@ def _get_primary_cluster(self) -> Optional[Application]: primary_cluster = None promoted_cluster_counter = "0" for async_relation in [ - self.model.get_relation(ASYNC_PRIMARY_RELATION), - self.model.get_relation(ASYNC_REPLICA_RELATION), + self.model.get_relation(REPLICATION_OFFER_RELATION), + self.model.get_relation(REPLICATION_CONSUMER_RELATION), ]: if async_relation is None: continue @@ -307,8 +318,8 @@ def get_standby_endpoints(self) -> List[str]: return [ relation.data[unit].get("unit-address") for relation in [ - self.model.get_relation(ASYNC_PRIMARY_RELATION), - self.model.get_relation(ASYNC_REPLICA_RELATION), + self.model.get_relation(REPLICATION_OFFER_RELATION), + self.model.get_relation(REPLICATION_CONSUMER_RELATION), ] if relation is not None for unit in relation.units @@ -408,6 +419,42 @@ def handle_read_only_mode(self) -> None: ): self.charm.unit.status = BlockedStatus(READ_ONLY_MODE_BLOCKING_MESSAGE) + def _handle_replication_change(self, event: ActionEvent) -> bool: + if not self._can_promote_cluster(event): + return False + + relation = self._relation + + # Check if all units from the other cluster published their pod IPs in the relation data. + # If not, fail the action telling that all units must publish their pod addresses in the + # relation data. + for unit in relation.units: + if "unit-address" not in relation.data[unit]: + event.fail( + "All units from the other cluster must publish their pod addresses in the relation data." + ) + return False + + system_identifier, error = self.get_system_identifier() + if error is not None: + logger.exception(error) + event.fail("Failed to get system identifier") + return False + + # Increment the current cluster counter in this application side based on the highest counter value. + promoted_cluster_counter = int(self._get_highest_promoted_cluster_counter_value()) + promoted_cluster_counter += 1 + logger.debug("Promoted cluster counter: %s", promoted_cluster_counter) + + self._update_primary_cluster_data(promoted_cluster_counter, system_identifier) + + # Emit an async replication changed event for this unit (to promote this cluster before demoting the + # other if this one is a standby cluster, which is needed to correctly set up the async replication + # when performing a switchover). + self._re_emit_async_relation_changed_event() + + return True + def _is_following_promoted_cluster(self) -> bool: """Return True if this unit is following the promoted cluster.""" if self._get_primary_cluster() is None: @@ -501,43 +548,24 @@ def _on_async_relation_joined(self, _) -> None: "unit-promoted-cluster-counter": highest_promoted_cluster_counter }) - def _on_promote_cluster(self, event: ActionEvent) -> None: - """Promote this cluster to the primary cluster.""" - if not self._can_promote_cluster(event): + def _on_create_replication(self, event: ActionEvent) -> None: + """Set up asynchronous replication between two clusters.""" + if not self._handle_replication_change(event): return - relation = self._relation + # Set the replication name in the relation data. + self._relation.data[self.charm.app].update({"name": event.params["name"]}) - # Check if all units from the other cluster published their pod IPs in the relation data. - # If not, fail the action telling that all units must publish their pod addresses in the - # relation data. - for unit in relation.units: - if "unit-address" not in relation.data[unit]: - event.fail( - "All units from the other cluster must publish their pod addresses in the relation data." - ) - return + # Set the status. + self.charm.unit.status = MaintenanceStatus("Creating replication...") - system_identifier, error = self.get_system_identifier() - if error is not None: - logger.exception(error) - event.fail("Failed to get system identifier") + def _on_promote_to_primary(self, event: ActionEvent) -> None: + """Promote this cluster to the primary cluster.""" + if not self._handle_replication_change(event): return - # Increment the current cluster counter in this application side based on the highest counter value. - promoted_cluster_counter = int(self._get_highest_promoted_cluster_counter_value()) - promoted_cluster_counter += 1 - logger.debug("Promoted cluster counter: %s", promoted_cluster_counter) - - self._update_primary_cluster_data(promoted_cluster_counter, system_identifier) - - # Emit an async replication changed event for this unit (to promote this cluster before demoting the - # other if this one is a standby cluster, which is needed to correctly setup the async replication - # when performing a switchover). - self._re_emit_async_relation_changed_event() - # Set the status. - self.charm.unit.status = MaintenanceStatus("Promoting cluster...") + self.charm.unit.status = MaintenanceStatus("Creating replication...") @property def _primary_cluster_endpoint(self) -> str: @@ -576,8 +604,8 @@ def _reinitialise_pgdata(self) -> None: def _relation(self) -> Relation: """Return the relation object.""" for relation in [ - self.model.get_relation(ASYNC_PRIMARY_RELATION), - self.model.get_relation(ASYNC_REPLICA_RELATION), + self.model.get_relation(REPLICATION_OFFER_RELATION), + self.model.get_relation(REPLICATION_CONSUMER_RELATION), ]: if relation is not None: return relation @@ -661,7 +689,7 @@ def _update_primary_cluster_data( primary_cluster_data = {"endpoint": self._primary_cluster_endpoint} # Retrieve the secrets that will be shared between the clusters. - if async_relation.name == ASYNC_PRIMARY_RELATION: + if async_relation.name == REPLICATION_OFFER_RELATION: secret = self._get_secret() secret.grant(async_relation) primary_cluster_data["secret-id"] = secret.id diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 5f0c27dea8..52aebf0210 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -150,10 +150,10 @@ async def test_async_replication( logger.info("checking whether writes are increasing") await are_writes_increasing(ops_test) - first_offer_command = f"offer {DATABASE_APP_NAME}:async-primary async-primary" + first_offer_command = f"offer {DATABASE_APP_NAME}:replication-offer replication-offer" await ops_test.juju(*first_offer_command.split()) first_consume_command = ( - f"consume -m {second_model.info.name} admin/{first_model.info.name}.async-primary" + f"consume -m {second_model.info.name} admin/{first_model.info.name}.replication-offer" ) await ops_test.juju(*first_consume_command.split()) @@ -167,7 +167,7 @@ async def test_async_replication( ), ) - await second_model.relate(DATABASE_APP_NAME, "async-primary") + await second_model.relate(DATABASE_APP_NAME, "replication-offer") async with ops_test.fast_forward(FAST_INTERVAL), fast_forward(second_model, FAST_INTERVAL): await gather( @@ -187,7 +187,7 @@ async def test_async_replication( leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) assert leader_unit is not None, "No leader unit found" logger.info("promoting the first cluster") - run_action = await leader_unit.run_action("promote-cluster") + run_action = await leader_unit.run_action("create-replication") await run_action.wait() assert (run_action.results.get("return-code", None) == 0) or ( run_action.results.get("Code", None) == "0" @@ -222,10 +222,10 @@ async def test_switchover( second_model_continuous_writes, ): """Test switching over to the second cluster.""" - second_offer_command = f"offer {DATABASE_APP_NAME}:async-replica async-replica" + second_offer_command = f"offer {DATABASE_APP_NAME}:replication replication" await ops_test.juju(*second_offer_command.split()) second_consume_command = ( - f"consume -m {second_model.info.name} admin/{first_model.info.name}.async-replica" + f"consume -m {second_model.info.name} admin/{first_model.info.name}.replication" ) await ops_test.juju(*second_consume_command.split()) @@ -244,7 +244,7 @@ async def test_switchover( leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME, model=second_model) assert leader_unit is not None, "No leader unit found" logger.info("promoting the second cluster") - run_action = await leader_unit.run_action("promote-cluster", **{"force-promotion": True}) + run_action = await leader_unit.run_action("promote-to-primary", **{"force-promotion": True}) await run_action.wait() assert (run_action.results.get("return-code", None) == 0) or ( run_action.results.get("Code", None) == "0" @@ -282,9 +282,9 @@ async def test_promote_standby( "database", f"{APPLICATION_NAME}:first-database" ) await second_model.applications[DATABASE_APP_NAME].remove_relation( - "async-replica", "async-primary" + "replication", "replication-offer" ) - wait_for_relation_removed_between(ops_test, "async-primary", "async-replica", second_model) + wait_for_relation_removed_between(ops_test, "replication-offer", "replication", second_model) async with ops_test.fast_forward(FAST_INTERVAL), fast_forward(second_model, FAST_INTERVAL): await gather( first_model.wait_for_idle( @@ -302,7 +302,7 @@ async def test_promote_standby( leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) assert leader_unit is not None, "No leader unit found" logger.info("promoting the first cluster") - run_action = await leader_unit.run_action("promote-cluster") + run_action = await leader_unit.run_action("promote-to-primary") await run_action.wait() assert (run_action.results.get("return-code", None) == 0) or ( run_action.results.get("Code", None) == "0" @@ -359,7 +359,7 @@ async def test_reestablish_relation( await are_writes_increasing(ops_test) logger.info("reestablishing the relation") - await second_model.relate(DATABASE_APP_NAME, "async-primary") + await second_model.relate(DATABASE_APP_NAME, "replication-offer") async with ops_test.fast_forward(FAST_INTERVAL), fast_forward(second_model, FAST_INTERVAL): await gather( first_model.wait_for_idle( @@ -378,7 +378,7 @@ async def test_reestablish_relation( leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) assert leader_unit is not None, "No leader unit found" logger.info("promoting the first cluster") - run_action = await leader_unit.run_action("promote-cluster") + run_action = await leader_unit.run_action("promote-to-primary") await run_action.wait() assert (run_action.results.get("return-code", None) == 0) or ( run_action.results.get("Code", None) == "0" From 7ac4f3f0992566a0f287646644302cf32f61974e Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Wed, 29 May 2024 16:50:41 -0300 Subject: [PATCH 02/13] Fix failover and set-secret behaviour Signed-off-by: Marcelo Henrique Neppel --- actions.yaml | 2 +- lib/charms/postgresql_k8s/v0/postgresql.py | 26 +-- src/charm.py | 48 ++++- src/cluster.py | 13 +- src/relations/async_replication.py | 173 +++++++++++++----- .../ha_tests/test_async_replication.py | 2 +- 6 files changed, 191 insertions(+), 73 deletions(-) diff --git a/actions.yaml b/actions.yaml index f5daf6944d..02f5c525f7 100644 --- a/actions.yaml +++ b/actions.yaml @@ -27,7 +27,7 @@ pre-upgrade-check: promote-to-primary: description: Promotes the cluster of choice to a primary cluster. Must be ran against the leader unit. params: - force-promotion: + force: type: boolean description: Force the promotion of a cluster when there is already a primary cluster. restore: diff --git a/lib/charms/postgresql_k8s/v0/postgresql.py b/lib/charms/postgresql_k8s/v0/postgresql.py index 8783f76814..ffddc66360 100644 --- a/lib/charms/postgresql_k8s/v0/postgresql.py +++ b/lib/charms/postgresql_k8s/v0/postgresql.py @@ -36,7 +36,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 26 +LIBPATCH = 27 INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE = "invalid role(s) for extra user roles" @@ -111,20 +111,19 @@ def __init__( self.system_users = system_users def _connect_to_database( - self, database: str = None, connect_to_current_host: bool = False + self, database: str = None, database_host: str = None ) -> psycopg2.extensions.connection: """Creates a connection to the database. Args: database: database to connect to (defaults to the database provided when the object for this class was created). - connect_to_current_host: whether to connect to the current host - instead of the primary host. + database_host: host to connect to instead of the primary host. Returns: psycopg2 connection object. """ - host = self.current_host if connect_to_current_host else self.primary_host + host = database_host if database_host is not None else self.primary_host connection = psycopg2.connect( f"dbname='{database if database else self.database}' user='{self.user}' host='{host}'" f"password='{self.password}' connect_timeout=1" @@ -388,7 +387,7 @@ def get_postgresql_text_search_configs(self) -> Set[str]: Set of PostgreSQL text search configs. """ with self._connect_to_database( - connect_to_current_host=True + database_host=self.current_host ) as connection, connection.cursor() as cursor: cursor.execute("SELECT CONCAT('pg_catalog.', cfgname) FROM pg_ts_config;") text_search_configs = cursor.fetchall() @@ -401,7 +400,7 @@ def get_postgresql_timezones(self) -> Set[str]: Set of PostgreSQL timezones. """ with self._connect_to_database( - connect_to_current_host=True + database_host=self.current_host ) as connection, connection.cursor() as cursor: cursor.execute("SELECT name FROM pg_timezone_names;") timezones = cursor.fetchall() @@ -434,7 +433,7 @@ def is_tls_enabled(self, check_current_host: bool = False) -> bool: """ try: with self._connect_to_database( - connect_to_current_host=check_current_host + database_host=self.current_host if check_current_host else None ) as connection, connection.cursor() as cursor: cursor.execute("SHOW ssl;") return "on" in cursor.fetchone()[0] @@ -502,19 +501,24 @@ def set_up_database(self) -> None: if connection is not None: connection.close() - def update_user_password(self, username: str, password: str) -> None: + def update_user_password( + self, username: str, password: str, database_host: str = None + ) -> None: """Update a user password. Args: username: the user to update the password. password: the new password for the user. + database_host: the host to connect to. Raises: PostgreSQLUpdateUserPasswordError if the password couldn't be changed. """ connection = None try: - with self._connect_to_database() as connection, connection.cursor() as cursor: + with self._connect_to_database( + database_host=database_host + ) as connection, connection.cursor() as cursor: cursor.execute( sql.SQL("ALTER USER {} WITH ENCRYPTED PASSWORD '" + password + "';").format( sql.Identifier(username) @@ -610,7 +614,7 @@ def validate_date_style(self, date_style: str) -> bool: """ try: with self._connect_to_database( - connect_to_current_host=True + database_host=self.current_host ) as connection, connection.cursor() as cursor: cursor.execute( sql.SQL( diff --git a/src/charm.py b/src/charm.py index 99592539af..bcf31ff165 100755 --- a/src/charm.py +++ b/src/charm.py @@ -86,7 +86,11 @@ USER, USER_PASSWORD_KEY, ) -from relations.async_replication import PostgreSQLAsyncReplication +from relations.async_replication import ( + REPLICATION_CONSUMER_RELATION, + REPLICATION_OFFER_RELATION, + PostgreSQLAsyncReplication, +) from relations.db import EXTENSIONS_BLOCKING_MESSAGE, DbProvides from relations.postgresql_provider import PostgreSQLProvider from upgrade import PostgreSQLUpgrade, get_postgresql_dependencies_model @@ -1194,15 +1198,42 @@ def _on_set_password(self, event: ActionEvent) -> None: ) return - # Update the password in the PostgreSQL instance. - try: - self.postgresql.update_user_password(username, password) - except PostgreSQLUpdateUserPasswordError as e: - logger.exception(e) + replication_offer_relation = self.model.get_relation(REPLICATION_OFFER_RELATION) + if ( + replication_offer_relation is not None + and not self.async_replication.is_primary_cluster() + ): + # Update the password in the other cluster PostgreSQL primary instance. + other_cluster_endpoints = self.async_replication.get_all_primary_cluster_endpoints() + other_cluster_primary = self._patroni.get_primary( + alternative_endpoints=other_cluster_endpoints + ) + other_cluster_primary_ip = [ + replication_offer_relation.data[unit].get("private-address") + for unit in replication_offer_relation.units + if unit.name.replace("/", "-") == other_cluster_primary + ][0] + try: + self.postgresql.update_user_password( + username, password, database_host=other_cluster_primary_ip + ) + except PostgreSQLUpdateUserPasswordError as e: + logger.exception(e) + event.fail("Failed changing the password.") + return + elif self.model.get_relation(REPLICATION_CONSUMER_RELATION) is not None: event.fail( - "Failed changing the password: Not all members healthy or finished initial sync." + "Failed changing the password: This action can be ran only in the cluster from the offer side." ) return + else: + # Update the password in this cluster PostgreSQL primary instance. + try: + self.postgresql.update_user_password(username, password) + except PostgreSQLUpdateUserPasswordError as e: + logger.exception(e) + event.fail("Failed changing the password.") + return # Update the password in the secret store. self.set_secret(APP_SCOPE, f"{username}-password", password) @@ -1211,9 +1242,6 @@ def _on_set_password(self, event: ActionEvent) -> None: # Other units Patroni configuration will be reloaded in the peer relation changed event. self.update_config() - # Update the password in the async replication data. - self.async_replication.update_async_replication_data() - event.set_results({"password": password}) def _on_update_status(self, _) -> None: diff --git a/src/cluster.py b/src/cluster.py index 4b08ff5a58..29a42e87b5 100644 --- a/src/cluster.py +++ b/src/cluster.py @@ -230,11 +230,12 @@ def get_member_status(self, member_name: str) -> str: return member["state"] return "" - def get_primary(self, unit_name_pattern=False) -> str: + def get_primary(self, unit_name_pattern=False, alternative_endpoints: List[str] = None) -> str: """Get primary instance. Args: unit_name_pattern: whether to convert pod name to unit name + alternative_endpoints: list of alternative endpoints to check for the primary. Returns: primary pod or unit name. @@ -242,7 +243,7 @@ def get_primary(self, unit_name_pattern=False) -> str: # Request info from cluster endpoint (which returns all members of the cluster). for attempt in Retrying(stop=stop_after_attempt(2 * len(self.peers_ips) + 1)): with attempt: - url = self._get_alternative_patroni_url(attempt) + url = self._get_alternative_patroni_url(attempt, alternative_endpoints) cluster_status = requests.get( f"{url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", verify=self.verify, @@ -301,12 +302,18 @@ def get_sync_standby_names(self) -> List[str]: sync_standbys.append("/".join(member["name"].rsplit("-", 1))) return sync_standbys - def _get_alternative_patroni_url(self, attempt: AttemptManager) -> str: + def _get_alternative_patroni_url( + self, attempt: AttemptManager, alternative_endpoints: List[str] = None + ) -> str: """Get an alternative REST API URL from another member each time. When the Patroni process is not running in the current unit it's needed to use a URL from another cluster member REST API to do some operations. """ + if alternative_endpoints is not None: + return self._patroni_url.replace( + self.unit_ip, alternative_endpoints[attempt.retry_state.attempt_number - 1] + ) attempt_number = attempt.retry_state.attempt_number if attempt_number > 1: url = self._patroni_url diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 1328e55879..3b7ffb9f57 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -35,6 +35,7 @@ RelationChangedEvent, RelationDepartedEvent, Secret, + SecretChangedEvent, SecretNotFoundError, WaitingStatus, ) @@ -54,6 +55,7 @@ READ_ONLY_MODE_BLOCKING_MESSAGE = "Cluster in read-only mode" REPLICATION_CONSUMER_RELATION = "replication" REPLICATION_OFFER_RELATION = "replication-offer" +SECRET_LABEL = "async-replication-secret" class PostgreSQLAsyncReplication(Object): @@ -106,6 +108,8 @@ def __init__(self, charm): self.charm.on.promote_to_primary_action, self._on_promote_to_primary ) + self.framework.observe(self.charm.on.secret_changed, self._on_secret_changed) + def _can_promote_cluster(self, event: ActionEvent) -> bool: """Check if the cluster can be promoted.""" if not self.charm.is_cluster_initialised: @@ -141,22 +145,7 @@ def _can_promote_cluster(self, event: ActionEvent) -> bool: event.fail("This cluster is already the primary cluster.") return False - # To promote the other cluster if there is already a primary cluster, the action must be called with - # `force-promotion=true`. If not, fail the action telling that the other cluster is already the primary. - if relation.app == primary_cluster: - if not event.params.get("force-promotion"): - event.fail( - f"{relation.app.name} is already the primary cluster. Pass `force-promotion=true` to promote anyway." - ) - return False - else: - logger.warning( - "%s is already the primary cluster. Forcing promotion of %s to primary cluster due to `force-promotion=true`.", - relation.app.name, - self.charm.app.name, - ) - - return True + return self._handle_forceful_promotion(event) def _configure_primary_cluster( self, primary_cluster: Application, event: RelationChangedEvent @@ -164,7 +153,7 @@ def _configure_primary_cluster( """Configure the primary cluster.""" if self.charm.app == primary_cluster: self.charm.update_config() - if self._is_primary_cluster() and self.charm.unit.is_leader(): + if self.is_primary_cluster() and self.charm.unit.is_leader(): self._update_primary_cluster_data() # If this is a standby cluster, remove the information from DCS to make it # a normal cluster. @@ -192,24 +181,10 @@ def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool: """Configure the standby cluster.""" relation = self._relation if relation.name == REPLICATION_CONSUMER_RELATION: - # Update the secrets between the clusters. - primary_cluster_info = relation.data[relation.app].get("primary-cluster-data") - secret_id = ( - None - if primary_cluster_info is None - else json.loads(primary_cluster_info).get("secret-id") - ) - try: - secret = self.charm.model.get_secret(id=secret_id, label=self._secret_label) - except SecretNotFoundError: + if not self._update_internal_secret(): logger.debug("Secret not found, deferring event") event.defer() return False - credentials = secret.peek_content() - for key, password in credentials.items(): - user = key.split("-password")[0] - self.charm.set_secret(APP_SCOPE, key, password) - logger.debug("Synced %s password", user) system_identifier, error = self.get_system_identifier() if error is not None: raise Exception(error) @@ -221,6 +196,24 @@ def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool: logger.warning("Please review the backup file %s and handle its removal", filename) return True + def get_all_primary_cluster_endpoints(self) -> List[str]: + """Return all the primary cluster endpoints.""" + relation = self._relation + primary_cluster = self._get_primary_cluster() + # List the primary endpoints only for the standby cluster. + if relation is None or primary_cluster is None or self.charm.app == primary_cluster: + return [] + return [ + relation.data[unit].get("unit-address") + for relation in [ + self.model.get_relation(REPLICATION_OFFER_RELATION), + self.model.get_relation(REPLICATION_CONSUMER_RELATION), + ] + if relation is not None + for unit in relation.units + if relation.data[unit].get("unit-address") is not None + ] + def _get_highest_promoted_cluster_counter_value(self) -> str: """Return the highest promoted cluster counter.""" promoted_cluster_counter = "0" @@ -289,24 +282,27 @@ def get_primary_cluster_endpoint(self) -> Optional[str]: def _get_secret(self) -> Secret: """Return async replication necessary secrets.""" + app_secret = self.charm.model.get_secret(label=f"{PEER}.{self.model.app.name}.app") + content = app_secret.peek_content() + + # Filter out unnecessary secrets. + shared_content = dict(filter(lambda x: "password" in x[0], content.items())) + try: # Avoid recreating the secret. - secret = self.charm.model.get_secret(label=self._secret_label) + secret = self.charm.model.get_secret(label=SECRET_LABEL) if not secret.id: # Workaround for the secret id not being set with model uuid. secret._id = f"secret://{self.model.uuid}/{secret.get_info().id.split(':')[1]}" + if secret.peek_content() != shared_content: + logger.info("Updating outdated secret content") + secret.set_content(shared_content) return secret except SecretNotFoundError: logger.debug("Secret not found, creating a new one") pass - app_secret = self.charm.model.get_secret(label=f"{PEER}.{self.model.app.name}.app") - content = app_secret.peek_content() - - # Filter out unnecessary secrets. - shared_content = dict(filter(lambda x: "password" in x[0], content.items())) - - return self.charm.model.app.add_secret(content=shared_content, label=self._secret_label) + return self.charm.model.app.add_secret(content=shared_content, label=SECRET_LABEL) def get_standby_endpoints(self) -> List[str]: """Return the standby endpoints.""" @@ -403,6 +399,31 @@ def _handle_database_start(self, event: RelationChangedEvent) -> None: logger.debug("Deferring on_async_relation_changed: database hasn't started yet.") event.defer() + def _handle_forceful_promotion(self, event: ActionEvent) -> bool: + if not event.params.get("force"): + all_primary_cluster_endpoints = self.get_all_primary_cluster_endpoints() + if len(all_primary_cluster_endpoints) > 0: + primary_cluster_reachable = False + try: + primary = self.charm._patroni.get_primary( + alternative_endpoints=all_primary_cluster_endpoints + ) + if primary is not None: + primary_cluster_reachable = True + except RetryError: + pass + if not primary_cluster_reachable: + event.fail( + f"{self._relation.app.name} isn't reachable. Pass `force=true` to promote anyway." + ) + return False + else: + logger.warning( + "Forcing promotion of %s to primary cluster due to `force=true`.", + self.charm.app.name, + ) + return True + def handle_read_only_mode(self) -> None: """Handle read-only mode (standby cluster that lost the relation with the primary cluster).""" promoted_cluster_counter = self.charm._peers.data[self.charm.app].get( @@ -464,7 +485,7 @@ def _is_following_promoted_cluster(self) -> bool: == self._get_highest_promoted_cluster_counter_value() ) - def _is_primary_cluster(self) -> bool: + def is_primary_cluster(self) -> bool: """Return the primary cluster name.""" return self.charm.app == self._get_primary_cluster() @@ -550,6 +571,14 @@ def _on_async_relation_joined(self, _) -> None: def _on_create_replication(self, event: ActionEvent) -> None: """Set up asynchronous replication between two clusters.""" + if self._get_primary_cluster() is not None: + event.fail("There is already a replication set up.") + return + + if self._relation.name == REPLICATION_CONSUMER_RELATION: + event.fail("This action must be run in the cluster where the offer was created.") + return + if not self._handle_replication_change(event): return @@ -561,12 +590,47 @@ def _on_create_replication(self, event: ActionEvent) -> None: def _on_promote_to_primary(self, event: ActionEvent) -> None: """Promote this cluster to the primary cluster.""" + if self._get_primary_cluster() is None: + event.fail( + "No primary cluster found. Run `create-replication` action in the cluster where the offer was created." + ) + return + if not self._handle_replication_change(event): return # Set the status. self.charm.unit.status = MaintenanceStatus("Creating replication...") + def _on_secret_changed(self, event: SecretChangedEvent) -> None: + """Update the internal secret when the relation secret changes.""" + relation = self._relation + if relation is None: + logger.debug("Early exit on_secret_changed: No relation found.") + return + + if ( + relation.name == REPLICATION_OFFER_RELATION + and event.secret.label == f"{PEER}.{self.model.app.name}.app" + ): + logger.info("Internal secret changed, updating relation secret") + secret = self._get_secret() + secret.grant(relation) + primary_cluster_data = { + "endpoint": self._primary_cluster_endpoint, + "secret-id": secret.id, + } + relation.data[self.charm.app]["primary-cluster-data"] = json.dumps( + primary_cluster_data + ) + return + + if relation.name == REPLICATION_CONSUMER_RELATION and event.secret.label == SECRET_LABEL: + logger.info("Relation secret changed, updating internal secret") + if not self._update_internal_secret(): + logger.debug("Secret not found, deferring event") + event.defer() + @property def _primary_cluster_endpoint(self) -> str: """Return the endpoint from one of the sync-standbys, or from the primary if there is no sync-standby.""" @@ -610,11 +674,6 @@ def _relation(self) -> Relation: if relation is not None: return relation - @property - def _secret_label(self) -> str: - """Return the secret label.""" - return f"async-replication-secret-{self._get_highest_promoted_cluster_counter_value()}" - def _stop_database(self, event: RelationChangedEvent) -> bool: """Stop the database.""" if ( @@ -670,9 +729,29 @@ def update_async_replication_data(self) -> None: if relation is None: return relation.data[self.charm.unit].update({"unit-address": self.charm._unit_ip}) - if self._is_primary_cluster() and self.charm.unit.is_leader(): + if self.is_primary_cluster() and self.charm.unit.is_leader(): self._update_primary_cluster_data() + def _update_internal_secret(self) -> bool: + # Update the secrets between the clusters. + relation = self._relation + primary_cluster_info = relation.data[relation.app].get("primary-cluster-data") + secret_id = ( + None + if primary_cluster_info is None + else json.loads(primary_cluster_info).get("secret-id") + ) + try: + secret = self.charm.model.get_secret(id=secret_id, label=SECRET_LABEL) + except SecretNotFoundError: + return False + credentials = secret.peek_content() + for key, password in credentials.items(): + user = key.split("-password")[0] + self.charm.set_secret(APP_SCOPE, key, password) + logger.debug("Synced %s password", user) + return True + def _update_primary_cluster_data( self, promoted_cluster_counter: int = None, system_identifier: str = None ) -> None: diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 52aebf0210..0967f7b5ff 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -244,7 +244,7 @@ async def test_switchover( leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME, model=second_model) assert leader_unit is not None, "No leader unit found" logger.info("promoting the second cluster") - run_action = await leader_unit.run_action("promote-to-primary", **{"force-promotion": True}) + run_action = await leader_unit.run_action("promote-to-primary", **{"force": True}) await run_action.wait() assert (run_action.results.get("return-code", None) == 0) or ( run_action.results.get("Code", None) == "0" From 02a584ce5116e4f9e0f2b718fccb2277669d221f Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 31 May 2024 15:01:57 -0300 Subject: [PATCH 03/13] Improve statuses Signed-off-by: Marcelo Henrique Neppel --- src/charm.py | 2 +- src/relations/async_replication.py | 51 ++++++++++++------- .../ha_tests/test_async_replication.py | 10 ++-- tests/unit/test_charm.py | 4 +- 4 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/charm.py b/src/charm.py index bcf31ff165..c8203426e9 100755 --- a/src/charm.py +++ b/src/charm.py @@ -1357,7 +1357,7 @@ def _set_primary_status_message(self) -> None: if self._patroni.get_primary(unit_name_pattern=True) == self.unit.name: self.unit.status = ActiveStatus("Primary") elif self.is_standby_leader: - self.unit.status = ActiveStatus("Standby Leader") + self.unit.status = ActiveStatus("Standby") elif self._patroni.member_started: self.unit.status = ActiveStatus() except (RetryError, ConnectionError) as e: diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 3b7ffb9f57..072a1e51df 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -27,6 +27,7 @@ from ops import ( ActionEvent, + ActiveStatus, Application, BlockedStatus, MaintenanceStatus, @@ -52,7 +53,7 @@ logger = logging.getLogger(__name__) -READ_ONLY_MODE_BLOCKING_MESSAGE = "Cluster in read-only mode" +READ_ONLY_MODE_BLOCKING_MESSAGE = "Standalone read-only cluster" REPLICATION_CONSUMER_RELATION = "replication" REPLICATION_OFFER_RELATION = "replication-offer" SECRET_LABEL = "async-replication-secret" @@ -124,13 +125,11 @@ def _can_promote_cluster(self, event: ActionEvent) -> bool: if standby_leader is not None: try: self.charm._patroni.promote_standby_cluster() - if ( - self.charm.is_blocked - and self.charm.unit.status.message == READ_ONLY_MODE_BLOCKING_MESSAGE - ): + if self.charm.app.status.message == READ_ONLY_MODE_BLOCKING_MESSAGE: self.charm._peers.data[self.charm.app].update({ "promoted-cluster-counter": "" }) + self._set_app_status() self.charm._set_primary_status_message() except (StandbyClusterAlreadyPromotedError, ClusterNotPromotedError) as e: event.fail(str(e)) @@ -426,19 +425,11 @@ def _handle_forceful_promotion(self, event: ActionEvent) -> bool: def handle_read_only_mode(self) -> None: """Handle read-only mode (standby cluster that lost the relation with the primary cluster).""" - promoted_cluster_counter = self.charm._peers.data[self.charm.app].get( - "promoted-cluster-counter", "" - ) - if not self.charm.is_blocked or ( - promoted_cluster_counter != "0" - and self.charm.unit.status.message == READ_ONLY_MODE_BLOCKING_MESSAGE - ): + if not self.charm.is_blocked: self.charm._set_primary_status_message() - if ( - promoted_cluster_counter == "0" - and self.charm.unit.status.message != READ_ONLY_MODE_BLOCKING_MESSAGE - ): - self.charm.unit.status = BlockedStatus(READ_ONLY_MODE_BLOCKING_MESSAGE) + + if self.charm.unit.is_leader(): + self._set_app_status() def _handle_replication_change(self, event: ActionEvent) -> bool: if not self._can_promote_cluster(event): @@ -504,7 +495,7 @@ def _on_async_relation_broken(self, _) -> None: if self.charm._patroni.get_standby_leader() is not None: if self.charm.unit.is_leader(): self.charm._peers.data[self.charm.app].update({"promoted-cluster-counter": "0"}) - self.charm.unit.status = BlockedStatus(READ_ONLY_MODE_BLOCKING_MESSAGE) + self._set_app_status() else: if self.charm.unit.is_leader(): self.charm._peers.data[self.charm.app].update({"promoted-cluster-counter": ""}) @@ -512,6 +503,9 @@ def _on_async_relation_broken(self, _) -> None: def _on_async_relation_changed(self, event: RelationChangedEvent) -> None: """Update the Patroni configuration if one of the clusters was already promoted.""" + if self.charm.unit.is_leader(): + self._set_app_status() + primary_cluster = self._get_primary_cluster() logger.debug("Primary cluster: %s", primary_cluster) if primary_cluster is None: @@ -590,7 +584,10 @@ def _on_create_replication(self, event: ActionEvent) -> None: def _on_promote_to_primary(self, event: ActionEvent) -> None: """Promote this cluster to the primary cluster.""" - if self._get_primary_cluster() is None: + if ( + self.charm.app.status.message != READ_ONLY_MODE_BLOCKING_MESSAGE + and self._get_primary_cluster() is None + ): event.fail( "No primary cluster found. Run `create-replication` action in the cluster where the offer was created." ) @@ -674,6 +671,22 @@ def _relation(self) -> Relation: if relation is not None: return relation + def _set_app_status(self) -> None: + """Set the app status.""" + if self.charm._peers.data[self.charm.app].get("promoted-cluster-counter") == "0": + self.charm.app.status = BlockedStatus(READ_ONLY_MODE_BLOCKING_MESSAGE) + return + if self._relation is None: + self.charm.app.status = ActiveStatus() + return + primary_cluster = self._get_primary_cluster() + if primary_cluster is None: + self.charm.app.status = ActiveStatus() + else: + self.charm.app.status = ActiveStatus( + "Primary" if self.charm.app == primary_cluster else "Standby" + ) + def _stop_database(self, event: RelationChangedEvent) -> bool: """Stop the database.""" if ( diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 0967f7b5ff..c5344d5a4d 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -288,10 +288,10 @@ async def test_promote_standby( async with ops_test.fast_forward(FAST_INTERVAL), fast_forward(second_model, FAST_INTERVAL): await gather( first_model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="blocked", - idle_period=IDLE_PERIOD, - timeout=TIMEOUT, + apps=[DATABASE_APP_NAME], idle_period=IDLE_PERIOD, timeout=TIMEOUT + ), + first_model.block_until( + lambda: first_model.applications[DATABASE_APP_NAME].status == "blocked", ), second_model.wait_for_idle( apps=[DATABASE_APP_NAME], status="active", idle_period=IDLE_PERIOD, timeout=TIMEOUT @@ -378,7 +378,7 @@ async def test_reestablish_relation( leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) assert leader_unit is not None, "No leader unit found" logger.info("promoting the first cluster") - run_action = await leader_unit.run_action("promote-to-primary") + run_action = await leader_unit.run_action("create-replication") await run_action.wait() assert (run_action.results.get("return-code", None) == 0) or ( run_action.results.get("Code", None) == "0" diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 48907eb4d0..c88181015d 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -2295,9 +2295,7 @@ def test_set_active_status(self, _get_primary, _is_standby_leader, _member_start self.charm.unit.status.message, "Primary" if values[0] == self.charm.unit.name - else ( - "Standby Leader" if values[1] else ("" if values[2] else "fake status") - ), + else ("Standby" if values[1] else ("" if values[2] else "fake status")), ) else: _get_primary.side_effect = values[0] From 1c000c5cddbc106489d15f403ef5234d7fe3716c Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 31 May 2024 16:50:58 -0300 Subject: [PATCH 04/13] Fix app status set Signed-off-by: Marcelo Henrique Neppel --- src/relations/async_replication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 072a1e51df..006966ddc7 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -495,7 +495,7 @@ def _on_async_relation_broken(self, _) -> None: if self.charm._patroni.get_standby_leader() is not None: if self.charm.unit.is_leader(): self.charm._peers.data[self.charm.app].update({"promoted-cluster-counter": "0"}) - self._set_app_status() + self._set_app_status() else: if self.charm.unit.is_leader(): self.charm._peers.data[self.charm.app].update({"promoted-cluster-counter": ""}) From ba84e96ed1d3fa98d9fa7d0e38543a06677b00b5 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 31 May 2024 16:52:52 -0300 Subject: [PATCH 05/13] Fix model switch Signed-off-by: Marcelo Henrique Neppel --- tests/integration/ha_tests/test_async_replication.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 7b3de97680..96bdb3afa4 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -75,6 +75,7 @@ async def second_model(ops_test: OpsTest, first_model, request) -> Model: subprocess.run( ["juju", "set-model-constraints", f"arch={architecture.architecture}"], check=True ) + subprocess.run(["juju", "switch", first_model.info.name], check=True) second_model = Model() await second_model.connect(model_name=second_model_name) yield second_model From 8afc8030697ad5e5bca78a4769da940051966f89 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Mon, 10 Jun 2024 15:34:42 -0300 Subject: [PATCH 06/13] Fix config integration test Signed-off-by: Marcelo Henrique Neppel --- tests/integration/test_config.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_config.py b/tests/integration/test_config.py index c75731053d..25878e8253 100644 --- a/tests/integration/test_config.py +++ b/tests/integration/test_config.py @@ -96,11 +96,16 @@ async def test_config_parameters(ops_test: OpsTest) -> None: logger.info(k) charm_config[k] = v[0] await ops_test.model.applications[DATABASE_APP_NAME].set_config(charm_config) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="blocked", timeout=100 + await ops_test.model.block_until( + lambda: ops_test.model.units[f"{DATABASE_APP_NAME}/0"].workload_status + == "blocked", + timeout=100, ) assert "Configuration Error" in leader_unit.workload_status_message charm_config[k] = v[1] await ops_test.model.applications[DATABASE_APP_NAME].set_config(charm_config) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=100) + await ops_test.model.block_until( + lambda: ops_test.model.units[f"{DATABASE_APP_NAME}/0"].workload_status == "active", + timeout=100, + ) From 936b115314863bf5267b351fcde576e5aa41a702 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Tue, 11 Jun 2024 17:54:35 -0300 Subject: [PATCH 07/13] Fix backups integration test Signed-off-by: Marcelo Henrique Neppel --- tests/integration/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index 2fe0b2d086..8a2ad24aa3 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -1036,10 +1036,10 @@ async def wait_for_idle_on_blocked( unit = ops_test.model.units.get(f"{database_app_name}/{unit_number}") await asyncio.gather( ops_test.model.wait_for_idle(apps=[other_app_name], status="active"), - ops_test.model.wait_for_idle( - apps=[database_app_name], status="blocked", raise_on_blocked=False + ops_test.model.block_until( + lambda: unit.workload_status == "blocked" + and unit.workload_status_message == status_message ), - ops_test.model.block_until(lambda: unit.workload_status_message == status_message), ) From 3f6ff31daa5935f15f4410fdb38861c8b53355c0 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Mon, 17 Jun 2024 17:16:30 -0300 Subject: [PATCH 08/13] Test status and logs from multiple models Signed-off-by: Marcelo Henrique Neppel --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5d71d94128..301f01bd27 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -70,7 +70,7 @@ jobs: - lint - unit-test - build - uses: canonical/data-platform-workflows/.github/workflows/integration_test_charm.yaml@v14.0.0 + uses: marceloneppel/data-platform-workflows/.github/workflows/integration_test_charm.yaml@dpe-4685-multiple-models-status-and-logs with: artifact-prefix: ${{ needs.build.outputs.artifact-prefix }} architecture: ${{ matrix.architecture }} From 4d817aa183480931f2cee2252cecd8ff25dad32a Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Mon, 17 Jun 2024 17:38:47 -0300 Subject: [PATCH 09/13] Fix organisation Signed-off-by: Marcelo Henrique Neppel --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 301f01bd27..b838841f3d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -70,7 +70,7 @@ jobs: - lint - unit-test - build - uses: marceloneppel/data-platform-workflows/.github/workflows/integration_test_charm.yaml@dpe-4685-multiple-models-status-and-logs + uses: canonical/data-platform-workflows/.github/workflows/integration_test_charm.yaml@dpe-4685-multiple-models-status-and-logs with: artifact-prefix: ${{ needs.build.outputs.artifact-prefix }} architecture: ${{ matrix.architecture }} From 946edbc9c136537090a18d0d1a790426ce632baa Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Mon, 17 Jun 2024 18:13:47 -0300 Subject: [PATCH 10/13] Remove unnecessary tests Signed-off-by: Marcelo Henrique Neppel --- .../integration/ha_tests/test_replication.py | 155 ----- .../ha_tests/test_restore_cluster.py | 130 ----- .../integration/ha_tests/test_self_healing.py | 545 ------------------ tests/integration/ha_tests/test_smoke.py | 221 ------- tests/integration/ha_tests/test_upgrade.py | 211 ------- .../ha_tests/test_upgrade_from_stable.py | 153 ----- tests/integration/test_backups.py | 516 ----------------- tests/integration/test_charm.py | 333 ----------- tests/integration/test_db.py | 358 ------------ tests/integration/test_db_admin.py | 177 ------ tests/integration/test_password_rotation.py | 196 ------- tests/integration/test_plugins.py | 210 ------- tests/integration/test_subordinates.py | 76 --- tests/integration/test_tls.py | 223 ------- 14 files changed, 3504 deletions(-) delete mode 100644 tests/integration/ha_tests/test_replication.py delete mode 100644 tests/integration/ha_tests/test_restore_cluster.py delete mode 100644 tests/integration/ha_tests/test_self_healing.py delete mode 100644 tests/integration/ha_tests/test_smoke.py delete mode 100644 tests/integration/ha_tests/test_upgrade.py delete mode 100644 tests/integration/ha_tests/test_upgrade_from_stable.py delete mode 100644 tests/integration/test_backups.py delete mode 100644 tests/integration/test_charm.py delete mode 100644 tests/integration/test_db.py delete mode 100644 tests/integration/test_db_admin.py delete mode 100644 tests/integration/test_password_rotation.py delete mode 100644 tests/integration/test_plugins.py delete mode 100644 tests/integration/test_subordinates.py delete mode 100644 tests/integration/test_tls.py diff --git a/tests/integration/ha_tests/test_replication.py b/tests/integration/ha_tests/test_replication.py deleted file mode 100644 index 600e2997d4..0000000000 --- a/tests/integration/ha_tests/test_replication.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. - -import pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from ..helpers import APPLICATION_NAME, CHARM_SERIES, db_connect, scale_application -from .helpers import ( - app_name, - are_writes_increasing, - check_writes, - fetch_cluster_members, - get_password, - get_primary, - start_continuous_writes, -) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy three unit of PostgreSQL.""" - wait_for_apps = False - # It is possible for users to provide their own cluster for HA testing. Hence, check if there - # is a pre-existing cluster. - if not await app_name(ops_test): - wait_for_apps = True - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - # Deploy the continuous writes application charm if it wasn't already deployed. - if not await app_name(ops_test, APPLICATION_NAME): - wait_for_apps = True - async with ops_test.fast_forward(): - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=APPLICATION_NAME, - series=CHARM_SERIES, - channel="edge", - ) - - if wait_for_apps: - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - -@pytest.mark.group(1) -async def test_reelection(ops_test: OpsTest, continuous_writes, primary_start_timeout) -> None: - """Kill primary unit, check reelection.""" - app = await app_name(ops_test) - if len(ops_test.model.applications[app].units) < 2: - await scale_application(ops_test, app, 2) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Remove the primary unit. - primary_name = await get_primary(ops_test, app) - await ops_test.model.destroy_units( - primary_name, - ) - - # Wait and get the primary again (which can be any unit, including the previous primary). - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(apps=[app], status="active") - - await are_writes_increasing(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name != primary_name, "primary reelection hasn't happened" - - # Verify that all units are part of the same cluster. - member_ips = await fetch_cluster_members(ops_test) - app = primary_name.split("/")[0] - ip_addresses = [unit.public_address for unit in ops_test.model.applications[app].units] - assert set(member_ips) == set(ip_addresses), "not all units are part of the same cluster." - - # Verify that no writes to the database were missed after stopping the writes. - await check_writes(ops_test) - - -@pytest.mark.group(1) -async def test_consistency(ops_test: OpsTest, continuous_writes) -> None: - """Write to primary, read data from secondaries (check consistency).""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - await are_writes_increasing(ops_test, primary_name) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - await check_writes(ops_test) - - -@pytest.mark.group(1) -async def test_no_data_replicated_between_clusters(ops_test: OpsTest, continuous_writes) -> None: - """Check that writes in one cluster are not replicated to another cluster.""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Deploy another cluster. - new_cluster_app = f"second-{app}" - if not await app_name(ops_test, new_cluster_app): - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=new_cluster_app, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.wait_for_idle( - apps=[new_cluster_app], status="active", timeout=1500 - ) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - await are_writes_increasing(ops_test, primary_name) - - # Verify that no writes to the first cluster were missed after stopping the writes. - await check_writes(ops_test) - - # Verify that the data from the first cluster wasn't replicated to the second cluster. - password = await get_password(ops_test, app=new_cluster_app) - for unit in ops_test.model.applications[new_cluster_app].units: - try: - with db_connect( - host=unit.public_address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'continuous_writes');" - ) - assert not cursor.fetchone()[ - 0 - ], "table 'continuous_writes' was replicated to the second cluster" - finally: - connection.close() diff --git a/tests/integration/ha_tests/test_restore_cluster.py b/tests/integration/ha_tests/test_restore_cluster.py deleted file mode 100644 index d6af07e251..0000000000 --- a/tests/integration/ha_tests/test_restore_cluster.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - CHARM_SERIES, - db_connect, - get_password, - get_patroni_cluster, - get_primary, - get_unit_address, - set_password, -) -from .helpers import ( - add_unit_with_storage, - reused_full_cluster_recovery_storage, - storage_id, -) - -FIRST_APPLICATION = "first-cluster" -SECOND_APPLICATION = "second-cluster" - -logger = logging.getLogger(__name__) - -charm = None - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy two PostgreSQL clusters.""" - # This is a potentially destructive test, so it shouldn't be run against existing clusters - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - # Deploy the first cluster with reusable storage - await ops_test.model.deploy( - charm, - application_name=FIRST_APPLICATION, - num_units=3, - series=CHARM_SERIES, - storage={"pgdata": {"pool": "lxd-btrfs", "size": 2048}}, - config={"profile": "testing"}, - ) - - # Deploy the second cluster - await ops_test.model.deploy( - charm, - application_name=SECOND_APPLICATION, - num_units=1, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - # TODO have a better way to bootstrap clusters with existing storage - primary = await get_primary( - ops_test, ops_test.model.applications[FIRST_APPLICATION].units[0].name - ) - for user in ["monitoring", "operator", "replication", "rewind"]: - password = await get_password(ops_test, primary, user) - second_primary = ops_test.model.applications[SECOND_APPLICATION].units[0].name - await set_password(ops_test, second_primary, user, password) - await ops_test.model.destroy_unit(second_primary) - - -@pytest.mark.group(1) -async def test_cluster_restore(ops_test): - """Recreates the cluster from storage volumes.""" - # Write some data. - primary = await get_primary( - ops_test, ops_test.model.applications[FIRST_APPLICATION].units[0].name - ) - password = await get_password(ops_test, primary) - address = get_unit_address(ops_test, primary) - logger.info("creating a table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute( - "CREATE TABLE IF NOT EXISTS restore_table_1 (test_collumn INT );" - ) - connection.close() - - logger.info("Downscaling the existing cluster") - storages = [] - for unit in ops_test.model.applications[FIRST_APPLICATION].units: - storages.append(storage_id(ops_test, unit.name)) - await ops_test.model.destroy_unit(unit.name) - - await ops_test.model.remove_application(FIRST_APPLICATION, block_until_done=True) - - # Recreate cluster - logger.info("Upscaling the second cluster with the old data") - for storage in storages: - unit = await add_unit_with_storage(ops_test, SECOND_APPLICATION, storage) - assert await reused_full_cluster_recovery_storage( - ops_test, unit.name - ), "attached storage not properly re-used by Postgresql." - - primary = await get_primary( - ops_test, ops_test.model.applications[SECOND_APPLICATION].units[0].name - ) - address = get_unit_address(ops_test, primary) - logger.info("checking that data was persisted") - with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'restore_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "data wasn't correctly restored: table 'restore_table_1' doesn't exist" - connection.close() - - # check that there is only one primary - cluster = get_patroni_cluster( - ops_test.model.applications[SECOND_APPLICATION].units[0].public_address - ) - primaries = [member for member in cluster["members"] if member["role"] == "leader"] - assert len(primaries) == 1, "There isn't just a single primary" - - # check that all units are member of the new cluster - members = [member["name"] for member in cluster["members"]] - for unit in ops_test.model.applications[SECOND_APPLICATION].units: - assert unit.name.replace("/", "-") in members, "Unit missing from cluster" - assert len(members) == len(storages), "Number of restored units and reused storages diverge" diff --git a/tests/integration/ha_tests/test_self_healing.py b/tests/integration/ha_tests/test_self_healing.py deleted file mode 100644 index 63d5b5abaa..0000000000 --- a/tests/integration/ha_tests/test_self_healing.py +++ /dev/null @@ -1,545 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import asyncio -import logging - -import pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from ..helpers import ( - CHARM_SERIES, - db_connect, - get_machine_from_unit, - get_password, - get_unit_address, - run_command_on_unit, -) -from .conftest import APPLICATION_NAME -from .helpers import ( - METADATA, - ORIGINAL_RESTART_CONDITION, - add_unit_with_storage, - app_name, - are_all_db_processes_down, - are_writes_increasing, - change_patroni_setting, - change_wal_settings, - check_writes, - cut_network_from_unit, - cut_network_from_unit_without_ip_change, - fetch_cluster_members, - get_controller_machine, - get_patroni_setting, - get_primary, - get_unit_ip, - is_cluster_updated, - is_connection_possible, - is_machine_reachable_from, - is_postgresql_ready, - is_replica, - is_secondary_up_to_date, - list_wal_files, - restore_network_for_unit, - restore_network_for_unit_without_ip_change, - reused_replica_storage, - send_signal_to_process, - start_continuous_writes, - storage_id, - storage_type, - update_restart_condition, - wait_network_restore, -) - -logger = logging.getLogger(__name__) - -APP_NAME = METADATA["name"] -PATRONI_PROCESS = "/snap/charmed-postgresql/[0-9]*/usr/bin/patroni" -POSTGRESQL_PROCESS = "postgres" -DB_PROCESSES = [POSTGRESQL_PROCESS, PATRONI_PROCESS] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy three unit of PostgreSQL.""" - wait_for_apps = False - # It is possible for users to provide their own cluster for HA testing. Hence, check if there - # is a pre-existing cluster. - if not await app_name(ops_test): - wait_for_apps = True - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - num_units=3, - series=CHARM_SERIES, - storage={"pgdata": {"pool": "lxd-btrfs", "size": 2048}}, - config={"profile": "testing"}, - ) - # Deploy the continuous writes application charm if it wasn't already deployed. - if not await app_name(ops_test, APPLICATION_NAME): - wait_for_apps = True - async with ops_test.fast_forward(): - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=APPLICATION_NAME, - series=CHARM_SERIES, - channel="edge", - ) - - if wait_for_apps: - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - -@pytest.mark.group(1) -async def test_storage_re_use(ops_test, continuous_writes): - """Verifies that database units with attached storage correctly repurpose storage. - - It is not enough to verify that Juju attaches the storage. Hence test checks that the - postgresql properly uses the storage that was provided. (ie. doesn't just re-sync everything - from primary, but instead computes a diff between current storage and primary storage.) - """ - app = await app_name(ops_test) - if storage_type(ops_test, app) == "rootfs": - pytest.skip( - "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" - ) - - # removing the only replica can be disastrous - if len(ops_test.model.applications[app].units) < 2: - await ops_test.model.applications[app].add_unit(count=1) - await ops_test.model.wait_for_idle(apps=[app], status="active", timeout=1500) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # remove a unit and attach it's storage to a new unit - for unit in ops_test.model.applications[app].units: - if await is_replica(ops_test, unit.name): - break - unit_storage_id = storage_id(ops_test, unit.name) - expected_units = len(ops_test.model.applications[app].units) - 1 - await ops_test.model.destroy_unit(unit.name) - await ops_test.model.wait_for_idle( - apps=[app], status="active", timeout=1000, wait_for_exact_units=expected_units - ) - new_unit = await add_unit_with_storage(ops_test, app, unit_storage_id) - - assert await reused_replica_storage( - ops_test, new_unit.name - ), "attached storage not properly re-used by Postgresql." - - # Verify that no writes to the database were missed after stopping the writes. - total_expected_writes = await check_writes(ops_test) - - # Verify that new instance is up-to-date. - assert await is_secondary_up_to_date( - ops_test, new_unit.name, total_expected_writes - ), "new instance not up to date." - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -async def test_kill_db_process( - ops_test: OpsTest, process: str, continuous_writes, primary_start_timeout -) -> None: - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Kill the database process. - await send_signal_to_process(ops_test, primary_name, process, "SIGKILL") - - async with ops_test.fast_forward(): - await are_writes_increasing(ops_test, primary_name) - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name != primary_name - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -async def test_freeze_db_process( - ops_test: OpsTest, process: str, continuous_writes, primary_start_timeout -) -> None: - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Freeze the database process. - await send_signal_to_process(ops_test, primary_name, process, "SIGSTOP") - - async with ops_test.fast_forward(): - # Verify new writes are continuing by counting the number of writes before and after a - # 3 minutes wait (this is a little more than the loop wait configuration, that is - # considered to trigger a fail-over after primary_start_timeout is changed, and also - # when freezing the DB process it take some more time to trigger the fail-over). - try: - await are_writes_increasing(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app, down_unit=primary_name) - assert new_primary_name != primary_name - finally: - # Un-freeze the old primary. - await send_signal_to_process(ops_test, primary_name, process, "SIGCONT") - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -async def test_restart_db_process( - ops_test: OpsTest, process: str, continuous_writes, primary_start_timeout -) -> None: - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Restart the database process. - await send_signal_to_process(ops_test, primary_name, process, "SIGTERM") - - async with ops_test.fast_forward(): - await are_writes_increasing(ops_test, primary_name) - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name != primary_name - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -@pytest.mark.parametrize("signal", ["SIGTERM", "SIGKILL"]) -async def test_full_cluster_restart( - ops_test: OpsTest, - process: str, - signal: str, - continuous_writes, - reset_restart_condition, - loop_wait, -) -> None: - """This tests checks that a cluster recovers from a full cluster restart. - - The test can be called a full cluster crash when the signal sent to the OS process - is SIGKILL. - """ - # Locate primary unit. - app = await app_name(ops_test) - - # Change the loop wait setting to make Patroni wait more time before restarting PostgreSQL. - initial_loop_wait = await get_patroni_setting(ops_test, "loop_wait") - await change_patroni_setting(ops_test, "loop_wait", 300, use_random_unit=True) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Restart all units "simultaneously". - await asyncio.gather(*[ - send_signal_to_process(ops_test, unit.name, process, signal) - for unit in ops_test.model.applications[app].units - ]) - - # This test serves to verify behavior when all replicas are down at the same time that when - # they come back online they operate as expected. This check verifies that we meet the criteria - # of all replicas being down at the same time. - try: - assert await are_all_db_processes_down( - ops_test, process - ), "Not all units down at the same time." - finally: - if process == PATRONI_PROCESS: - awaits = [] - for unit in ops_test.model.applications[app].units: - awaits.append(update_restart_condition(ops_test, unit, ORIGINAL_RESTART_CONDITION)) - await asyncio.gather(*awaits) - await change_patroni_setting( - ops_test, "loop_wait", initial_loop_wait, use_random_unit=True - ) - - # Verify all units are up and running. - for unit in ops_test.model.applications[app].units: - assert await is_postgresql_ready( - ops_test, unit.name - ), f"unit {unit.name} not restarted after cluster restart." - - async with ops_test.fast_forward(): - await are_writes_increasing(ops_test) - - # Verify that all units are part of the same cluster. - member_ips = await fetch_cluster_members(ops_test) - ip_addresses = [unit.public_address for unit in ops_test.model.applications[app].units] - assert set(member_ips) == set(ip_addresses), "not all units are part of the same cluster." - - # Verify that no writes to the database were missed after stopping the writes. - async with ops_test.fast_forward(): - await check_writes(ops_test) - - -@pytest.mark.group(1) -@pytest.mark.unstable -async def test_forceful_restart_without_data_and_transaction_logs( - ops_test: OpsTest, - continuous_writes, - primary_start_timeout, - wal_settings, -) -> None: - """A forceful restart with deleted data and without transaction logs (forced clone).""" - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Copy data dir content removal script. - await ops_test.juju( - "scp", "tests/integration/ha_tests/clean-data-dir.sh", f"{primary_name}:/tmp" - ) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Stop the systemd service on the primary unit. - await run_command_on_unit(ops_test, primary_name, "snap stop charmed-postgresql.patroni") - - # Data removal runs within a script, so it allows `*` expansion. - return_code, _, _ = await ops_test.juju( - "ssh", - primary_name, - "sudo", - "/tmp/clean-data-dir.sh", - ) - assert return_code == 0, "Failed to remove data directory" - - async with ops_test.fast_forward(): - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name is not None - assert new_primary_name != primary_name - - await are_writes_increasing(ops_test, primary_name) - - # Change some settings to enable WAL rotation. - for unit in ops_test.model.applications[app].units: - if unit.name == primary_name: - continue - await change_wal_settings(ops_test, unit.name, 32, 32, 1) - - # Rotate the WAL segments. - files = await list_wal_files(ops_test, app) - host = get_unit_address(ops_test, new_primary_name) - password = await get_password(ops_test, new_primary_name) - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - # Run some commands to make PostgreSQL do WAL rotation. - cursor.execute("SELECT pg_switch_wal();") - cursor.execute("CHECKPOINT;") - cursor.execute("SELECT pg_switch_wal();") - connection.close() - new_files = await list_wal_files(ops_test, app) - # Check that the WAL was correctly rotated. - for unit_name in files: - assert not files[unit_name].intersection( - new_files - ), "WAL segments weren't correctly rotated" - - # Start the systemd service in the old primary. - await run_command_on_unit(ops_test, primary_name, "snap start charmed-postgresql.patroni") - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -async def test_network_cut(ops_test: OpsTest, continuous_writes, primary_start_timeout): - """Completely cut and restore network.""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Get unit hostname and IP. - primary_hostname = await get_machine_from_unit(ops_test, primary_name) - primary_ip = await get_unit_ip(ops_test, primary_name) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name - ), f"Connection {primary_name} is not possible" - - logger.info(f"Cutting network for {primary_name}") - cut_network_from_unit(primary_hostname) - - # Verify machine is not reachable from peer units. - all_units_names = [unit.name for unit in ops_test.model.applications[app].units] - for unit_name in set(all_units_names) - {primary_name}: - logger.info(f"checking for no connectivity between {primary_name} and {unit_name}") - hostname = await get_machine_from_unit(ops_test, unit_name) - assert not is_machine_reachable_from( - hostname, primary_hostname - ), "unit is reachable from peer" - - # Verify machine is not reachable from controller. - logger.info(f"checking for no connectivity between {primary_name} and the controller") - controller = await get_controller_machine(ops_test) - assert not is_machine_reachable_from( - controller, primary_hostname - ), "unit is reachable from controller" - - # Verify that connection is not possible. - logger.info("checking whether the connectivity to the database is not working") - assert not await is_connection_possible( - ops_test, primary_name - ), "Connection is possible after network cut" - - async with ops_test.fast_forward(): - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test, primary_name) - - logger.info("checking whether a new primary was elected") - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app, down_unit=primary_name) - assert new_primary_name != primary_name - - logger.info(f"Restoring network for {primary_name}") - restore_network_for_unit(primary_hostname) - - # Wait until the cluster becomes idle (some operations like updating the member - # IP are made). - logger.info("waiting for cluster to become idle after updating member IP") - async with ops_test.fast_forward(fast_interval="60s"): - await ops_test.model.wait_for_idle( - apps=[app], - status="active", - raise_on_blocked=True, - timeout=1000, - idle_period=30, - ) - - # Wait the LXD unit has its IP updated. - logger.info("waiting for IP address to be updated on Juju unit") - await wait_network_restore(ops_test, primary_name, primary_ip) - - # Verify that the database service got restarted and is ready in the old primary. - logger.info(f"waiting for the database service to be ready on {primary_name}") - assert await is_postgresql_ready(ops_test, primary_name, use_ip_from_inside=True) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name, use_ip_from_inside=True - ), "Connection is not possible after network restore" - - await is_cluster_updated(ops_test, primary_name, use_ip_from_inside=True) - - -@pytest.mark.group(1) -async def test_network_cut_without_ip_change( - ops_test: OpsTest, continuous_writes, primary_start_timeout -): - """Completely cut and restore network (situation when the unit IP doesn't change).""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Get unit hostname and IP. - primary_hostname = await get_machine_from_unit(ops_test, primary_name) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name - ), f"Connection {primary_name} is not possible" - - logger.info(f"Cutting network for {primary_name}") - cut_network_from_unit_without_ip_change(primary_hostname) - - # Verify machine is not reachable from peer units. - all_units_names = [unit.name for unit in ops_test.model.applications[app].units] - for unit_name in set(all_units_names) - {primary_name}: - logger.info(f"checking for no connectivity between {primary_name} and {unit_name}") - hostname = await get_machine_from_unit(ops_test, unit_name) - assert not is_machine_reachable_from( - hostname, primary_hostname - ), "unit is reachable from peer" - - # Verify machine is not reachable from controller. - logger.info(f"checking for no connectivity between {primary_name} and the controller") - controller = await get_controller_machine(ops_test) - assert not is_machine_reachable_from( - controller, primary_hostname - ), "unit is reachable from controller" - - # Verify that connection is not possible. - logger.info("checking whether the connectivity to the database is not working") - assert not await is_connection_possible( - ops_test, primary_name - ), "Connection is possible after network cut" - - async with ops_test.fast_forward(): - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test, primary_name, use_ip_from_inside=True) - - logger.info("checking whether a new primary was elected") - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app, down_unit=primary_name) - assert new_primary_name != primary_name - - logger.info(f"Restoring network for {primary_name}") - restore_network_for_unit_without_ip_change(primary_hostname) - - # Wait until the cluster becomes idle. - logger.info("waiting for cluster to become idle") - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(apps=[app], status="active") - - # Verify that the database service got restarted and is ready in the old primary. - logger.info(f"waiting for the database service to be ready on {primary_name}") - assert await is_postgresql_ready(ops_test, primary_name) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name - ), "Connection is not possible after network restore" - - await is_cluster_updated(ops_test, primary_name, use_ip_from_inside=True) diff --git a/tests/integration/ha_tests/test_smoke.py b/tests/integration/ha_tests/test_smoke.py deleted file mode 100644 index 8da83ce3eb..0000000000 --- a/tests/integration/ha_tests/test_smoke.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging -from asyncio import TimeoutError - -import pytest -from juju import tag -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from ..helpers import ( - APPLICATION_NAME, - CHARM_SERIES, -) -from ..juju_ import juju_major_version -from .helpers import ( - add_unit_with_storage, - check_db, - check_password_auth, - create_db, - get_any_deatached_storage, - is_postgresql_ready, - is_storage_exists, - remove_unit_force, - storage_id, -) - -TEST_DATABASE_NAME = "test_database" -DUP_APPLICATION_NAME = "postgres-test-dup" - -logger = logging.getLogger(__name__) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_app_force_removal(ops_test: OpsTest, charm: str): - """Remove unit with force while storage is alive.""" - async with ops_test.fast_forward(): - # Deploy the charm. - logger.info("deploying charm") - await ops_test.model.deploy( - charm, - application_name=APPLICATION_NAME, - num_units=1, - series=CHARM_SERIES, - storage={"pgdata": {"pool": "lxd-btrfs", "size": 8046}}, - config={"profile": "testing"}, - ) - - logger.info("waiting for idle") - await ops_test.model.wait_for_idle(apps=[APPLICATION_NAME], status="active", timeout=1500) - assert ops_test.model.applications[APPLICATION_NAME].units[0].workload_status == "active" - - primary_name = ops_test.model.applications[APPLICATION_NAME].units[0].name - - logger.info("waiting for postgresql") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_postgresql_ready(ops_test, primary_name) - - logger.info("getting storage id") - storage_id_str = storage_id(ops_test, primary_name) - - # Check if storage exists after application deployed - logger.info("werifing is storage exists") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_storage_exists(ops_test, storage_id_str) - - # Create test database to check there is no resources conflicts - logger.info("creating db") - await create_db(ops_test, APPLICATION_NAME, TEST_DATABASE_NAME) - - # Check that test database is not exists for new unit - logger.info("checking db") - assert await check_db(ops_test, APPLICATION_NAME, TEST_DATABASE_NAME) - - # Destroy charm - logger.info("force removing charm") - if juju_major_version == 2: - await remove_unit_force(ops_test, primary_name) - else: - await ops_test.model.destroy_unit( - primary_name, force=True, destroy_storage=False, max_wait=1500 - ) - - # Storage should remain - logger.info("werifing is storage exists") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_storage_exists(ops_test, storage_id_str) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_charm_garbage_ignorance(ops_test: OpsTest, charm: str): - """Test charm deploy in dirty environment with garbage storage.""" - async with ops_test.fast_forward(): - logger.info("checking garbage storage") - garbage_storage = None - for attempt in Retrying(stop=stop_after_delay(30 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - garbage_storage = await get_any_deatached_storage(ops_test) - - logger.info("add unit with attached storage") - await add_unit_with_storage(ops_test, APPLICATION_NAME, garbage_storage) - - primary_name = ops_test.model.applications[APPLICATION_NAME].units[0].name - - logger.info("waiting for postgresql") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_postgresql_ready(ops_test, primary_name) - - logger.info("getting storage id") - storage_id_str = storage_id(ops_test, primary_name) - - assert storage_id_str == garbage_storage - - # Check if storage exists after application deployed - logger.info("werifing is storage exists") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_storage_exists(ops_test, storage_id_str) - - # Check that test database exists for new unit - logger.info("checking db") - assert await check_db(ops_test, APPLICATION_NAME, TEST_DATABASE_NAME) - - logger.info("removing charm") - await ops_test.model.destroy_unit(primary_name) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skipif(juju_major_version < 3, reason="Requires juju 3 or higher") -async def test_app_resources_conflicts_v3(ops_test: OpsTest, charm: str): - """Test application deploy in dirty environment with garbage storage from another application.""" - async with ops_test.fast_forward(): - logger.info("checking garbage storage") - garbage_storage = None - for attempt in Retrying(stop=stop_after_delay(30 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - garbage_storage = await get_any_deatached_storage(ops_test) - - logger.info("deploying duplicate application with attached storage") - await ops_test.model.deploy( - charm, - application_name=DUP_APPLICATION_NAME, - num_units=1, - series=CHARM_SERIES, - attach_storage=[tag.storage(garbage_storage)], - config={"profile": "testing"}, - ) - - # Reducing the update status frequency to speed up the triggering of deferred events. - await ops_test.model.set_config({"update-status-hook-interval": "10s"}) - - logger.info("waiting for duplicate application to be blocked") - try: - await ops_test.model.wait_for_idle( - apps=[DUP_APPLICATION_NAME], timeout=1000, status="blocked" - ) - except TimeoutError: - logger.info("Application is not in blocked state. Checking logs...") - - # Since application have postgresql db in storage from external application it should not be able to connect due to new password - logger.info("checking operator password auth") - assert not await check_password_auth( - ops_test, ops_test.model.applications[DUP_APPLICATION_NAME].units[0].name - ) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skipif(juju_major_version != 2, reason="Requires juju 2") -async def test_app_resources_conflicts_v2(ops_test: OpsTest, charm: str): - """Test application deploy in dirty environment with garbage storage from another application.""" - async with ops_test.fast_forward(): - logger.info("checking garbage storage") - garbage_storage = None - for attempt in Retrying(stop=stop_after_delay(30 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - garbage_storage = await get_any_deatached_storage(ops_test) - - # Deploy duplicaate charm - logger.info("deploying duplicate application") - await ops_test.model.deploy( - charm, - application_name=DUP_APPLICATION_NAME, - num_units=1, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - logger.info("force removing charm") - await remove_unit_force( - ops_test, ops_test.model.applications[DUP_APPLICATION_NAME].units[0].name - ) - - # Add unit with garbage storage - logger.info("adding charm with attached storage") - add_unit_cmd = f"add-unit {DUP_APPLICATION_NAME} --model={ops_test.model.info.name} --attach-storage={garbage_storage}".split() - return_code, _, _ = await ops_test.juju(*add_unit_cmd) - assert return_code == 0, "Failed to add unit with storage" - - logger.info("waiting for duplicate application to be blocked") - try: - await ops_test.model.wait_for_idle( - apps=[DUP_APPLICATION_NAME], timeout=1000, status="blocked" - ) - except TimeoutError: - logger.info("Application is not in blocked state. Checking logs...") - - # Since application have postgresql db in storage from external application it should not be able to connect due to new password - logger.info("checking operator password auth") - assert not await check_password_auth( - ops_test, ops_test.model.applications[DUP_APPLICATION_NAME].units[0].name - ) diff --git a/tests/integration/ha_tests/test_upgrade.py b/tests/integration/ha_tests/test_upgrade.py deleted file mode 100644 index 608986eeca..0000000000 --- a/tests/integration/ha_tests/test_upgrade.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. - -import json -import logging -import shutil -import zipfile -from pathlib import Path -from typing import Union - -import pytest -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - APPLICATION_NAME, - DATABASE_APP_NAME, - count_switchovers, - get_leader_unit, - get_primary, -) -from ..new_relations.helpers import get_application_relation_data -from .helpers import ( - are_writes_increasing, - check_writes, - start_continuous_writes, -) - -logger = logging.getLogger(__name__) - -TIMEOUT = 600 - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_deploy_latest(ops_test: OpsTest) -> None: - """Simple test to ensure that the PostgreSQL and application charms get deployed.""" - await ops_test.model.deploy( - DATABASE_APP_NAME, - num_units=3, - channel="14/edge", - config={"profile": "testing"}, - ) - await ops_test.model.deploy( - APPLICATION_NAME, - num_units=1, - channel="latest/edge", - ) - logger.info("Wait for applications to become active") - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME], status="active", timeout=1500 - ) - assert len(ops_test.model.applications[DATABASE_APP_NAME].units) == 3 - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_pre_upgrade_check(ops_test: OpsTest) -> None: - """Test that the pre-upgrade-check action runs successfully.""" - logger.info("Get leader unit") - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - assert leader_unit is not None, "No leader unit found" - - logger.info("Run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_upgrade_from_edge(ops_test: OpsTest, continuous_writes) -> None: - # Start an application that continuously writes data to the database. - logger.info("starting continuous writes to the database") - await start_continuous_writes(ops_test, DATABASE_APP_NAME) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - primary_name = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - initial_number_of_switchovers = count_switchovers(ops_test, primary_name) - - application = ops_test.model.applications[DATABASE_APP_NAME] - - logger.info("Build charm locally") - charm = await ops_test.build_charm(".") - - logger.info("Refresh the charm") - await application.refresh(path=charm) - - logger.info("Wait for upgrade to start") - await ops_test.model.block_until( - lambda: "waiting" in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Wait for upgrade to complete") - async with ops_test.fast_forward("60s"): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", idle_period=30, timeout=TIMEOUT - ) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - logger.info("checking whether no writes were lost") - await check_writes(ops_test) - - logger.info("checking the number of switchovers") - final_number_of_switchovers = count_switchovers(ops_test, primary_name) - assert ( - final_number_of_switchovers - initial_number_of_switchovers - ) <= 2, "Number of switchovers is greater than 2" - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_fail_and_rollback(ops_test, continuous_writes) -> None: - # Start an application that continuously writes data to the database. - logger.info("starting continuous writes to the database") - await start_continuous_writes(ops_test, DATABASE_APP_NAME) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - logger.info("Get leader unit") - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - assert leader_unit is not None, "No leader unit found" - - logger.info("Run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - local_charm = await ops_test.build_charm(".") - if isinstance(local_charm, str): - filename = local_charm.split("/")[-1] - else: - filename = local_charm.name - fault_charm = Path("/tmp/", filename) - shutil.copy(local_charm, fault_charm) - - logger.info("Inject dependency fault") - await inject_dependency_fault(ops_test, DATABASE_APP_NAME, fault_charm) - - application = ops_test.model.applications[DATABASE_APP_NAME] - - logger.info("Refresh the charm") - await application.refresh(path=fault_charm) - - logger.info("Wait for upgrade to fail") - async with ops_test.fast_forward("60s"): - await ops_test.model.block_until( - lambda: "blocked" in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Ensure continuous_writes while in failure state on remaining units") - await are_writes_increasing(ops_test) - - logger.info("Re-run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - logger.info("Re-refresh the charm") - await application.refresh(path=local_charm) - - logger.info("Wait for upgrade to start") - await ops_test.model.block_until( - lambda: "waiting" in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Wait for application to recover") - async with ops_test.fast_forward("60s"): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=TIMEOUT - ) - - logger.info("Ensure continuous_writes after rollback procedure") - await are_writes_increasing(ops_test) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - logger.info("Checking whether no writes were lost") - await check_writes(ops_test) - - # Remove fault charm file. - fault_charm.unlink() - - -async def inject_dependency_fault( - ops_test: OpsTest, application_name: str, charm_file: Union[str, Path] -) -> None: - """Inject a dependency fault into the PostgreSQL charm.""" - # Query running dependency to overwrite with incompatible version. - dependencies = await get_application_relation_data( - ops_test, application_name, "upgrade", "dependencies" - ) - loaded_dependency_dict = json.loads(dependencies) - if "snap" not in loaded_dependency_dict: - loaded_dependency_dict["snap"] = {"dependencies": {}, "name": "charmed-postgresql"} - loaded_dependency_dict["snap"]["upgrade_supported"] = "^15" - loaded_dependency_dict["snap"]["version"] = "15.0" - - # Overwrite dependency.json with incompatible version. - with zipfile.ZipFile(charm_file, mode="a") as charm_zip: - charm_zip.writestr("src/dependency.json", json.dumps(loaded_dependency_dict)) diff --git a/tests/integration/ha_tests/test_upgrade_from_stable.py b/tests/integration/ha_tests/test_upgrade_from_stable.py deleted file mode 100644 index c1584c6a6c..0000000000 --- a/tests/integration/ha_tests/test_upgrade_from_stable.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import json -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from .. import markers -from ..helpers import ( - APPLICATION_NAME, - DATABASE_APP_NAME, - count_switchovers, - get_leader_unit, - get_primary, - remove_chown_workaround, -) -from .helpers import ( - are_writes_increasing, - check_writes, - start_continuous_writes, -) - -logger = logging.getLogger(__name__) - -TIMEOUT = 600 - - -@pytest.mark.group(1) -@markers.amd64_only # TODO: remove after arm64 stable release -@pytest.mark.abort_on_fail -async def test_deploy_stable(ops_test: OpsTest) -> None: - """Simple test to ensure that the PostgreSQL and application charms get deployed.""" - return_code, charm_info, stderr = await ops_test.juju("info", "postgresql", "--format=json") - if return_code != 0: - raise Exception(f"failed to get charm info with error: {stderr}") - # Revisions lower than 315 have a currently broken workaround for chown. - parsed_charm_info = json.loads(charm_info) - revision = ( - parsed_charm_info["channels"]["14"]["stable"][0]["revision"] - if "channels" in parsed_charm_info - else parsed_charm_info["channel-map"]["14/stable"]["revision"] - ) - logger.info(f"14/stable revision: {revision}") - if int(revision) < 315: - original_charm_name = "./postgresql.charm" - return_code, _, stderr = await ops_test.juju( - "download", - "postgresql", - "--channel=14/stable", - f"--filepath={original_charm_name}", - ) - if return_code != 0: - raise Exception( - f"failed to download charm from 14/stable channel with error: {stderr}" - ) - patched_charm_name = "./modified_postgresql.charm" - remove_chown_workaround(original_charm_name, patched_charm_name) - return_code, _, stderr = await ops_test.juju("deploy", patched_charm_name, "-n", "3") - if return_code != 0: - raise Exception(f"failed to deploy charm from 14/stable channel with error: {stderr}") - else: - await ops_test.model.deploy( - DATABASE_APP_NAME, - num_units=3, - channel="14/stable", - ) - await ops_test.model.deploy( - APPLICATION_NAME, - num_units=1, - channel="latest/edge", - ) - logger.info("Wait for applications to become active") - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME], status="active", timeout=(20 * 60) - ) - assert len(ops_test.model.applications[DATABASE_APP_NAME].units) == 3 - - -@pytest.mark.group(1) -@markers.amd64_only # TODO: remove after arm64 stable release -@pytest.mark.abort_on_fail -async def test_pre_upgrade_check(ops_test: OpsTest) -> None: - """Test that the pre-upgrade-check action runs successfully.""" - application = ops_test.model.applications[DATABASE_APP_NAME] - if "pre-upgrade-check" not in await application.get_actions(): - logger.info("skipping the test because the charm from 14/stable doesn't support upgrade") - return - - logger.info("Get leader unit") - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - assert leader_unit is not None, "No leader unit found" - - logger.info("Run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - -@pytest.mark.group(1) -@markers.amd64_only # TODO: remove after arm64 stable release -@pytest.mark.abort_on_fail -async def test_upgrade_from_stable(ops_test: OpsTest): - """Test updating from stable channel.""" - # Start an application that continuously writes data to the database. - logger.info("starting continuous writes to the database") - await start_continuous_writes(ops_test, DATABASE_APP_NAME) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - primary_name = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - initial_number_of_switchovers = count_switchovers(ops_test, primary_name) - - application = ops_test.model.applications[DATABASE_APP_NAME] - actions = await application.get_actions() - - logger.info("Build charm locally") - charm = await ops_test.build_charm(".") - - logger.info("Refresh the charm") - await application.refresh(path=charm) - - logger.info("Wait for upgrade to start") - await ops_test.model.block_until( - lambda: ("waiting" if "pre-upgrade-check" in actions else "maintenance") - in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Wait for upgrade to complete") - async with ops_test.fast_forward("60s"): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", idle_period=30, timeout=TIMEOUT - ) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - logger.info("checking whether no writes were lost") - await check_writes(ops_test) - - # Check the number of switchovers. - if "pre-upgrade-check" in actions: - logger.info("checking the number of switchovers") - final_number_of_switchovers = count_switchovers(ops_test, primary_name) - assert ( - final_number_of_switchovers - initial_number_of_switchovers - ) <= 2, "Number of switchovers is greater than 2" diff --git a/tests/integration/test_backups.py b/tests/integration/test_backups.py deleted file mode 100644 index 887db7fd5b..0000000000 --- a/tests/integration/test_backups.py +++ /dev/null @@ -1,516 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import logging -import uuid -from typing import Dict, Tuple - -import boto3 -import pytest as pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_attempt, wait_exponential - -from . import architecture -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - construct_endpoint, - db_connect, - get_password, - get_primary, - get_unit_address, - scale_application, - switchover, - wait_for_idle_on_blocked, -) -from .juju_ import juju_major_version - -ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE = "the S3 repository has backups from another cluster" -FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE = ( - "failed to access/create the bucket, check your S3 settings" -) -FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE = "failed to initialize stanza, check your S3 settings" -S3_INTEGRATOR_APP_NAME = "s3-integrator" -if juju_major_version < 3: - tls_certificates_app_name = "tls-certificates-operator" - if architecture.architecture == "arm64": - tls_channel = "legacy/edge" - else: - tls_channel = "legacy/stable" - tls_config = {"generate-self-signed-certificates": "true", "ca-common-name": "Test CA"} -else: - tls_certificates_app_name = "self-signed-certificates" - if architecture.architecture == "arm64": - tls_channel = "latest/edge" - else: - tls_channel = "latest/stable" - tls_config = {"ca-common-name": "Test CA"} - -logger = logging.getLogger(__name__) - -AWS = "AWS" -GCP = "GCP" - - -@pytest.fixture(scope="module") -async def cloud_configs(ops_test: OpsTest, github_secrets) -> None: - # Define some configurations and credentials. - configs = { - AWS: { - "endpoint": "https://s3.amazonaws.com", - "bucket": "data-charms-testing", - "path": f"/postgresql-vm/{uuid.uuid1()}", - "region": "us-east-1", - }, - GCP: { - "endpoint": "https://storage.googleapis.com", - "bucket": "data-charms-testing", - "path": f"/postgresql-vm/{uuid.uuid1()}", - "region": "", - }, - } - credentials = { - AWS: { - "access-key": github_secrets["AWS_ACCESS_KEY"], - "secret-key": github_secrets["AWS_SECRET_KEY"], - }, - GCP: { - "access-key": github_secrets["GCP_ACCESS_KEY"], - "secret-key": github_secrets["GCP_SECRET_KEY"], - }, - } - yield configs, credentials - # Delete the previously created objects. - logger.info("deleting the previously created backups") - for cloud, config in configs.items(): - session = boto3.session.Session( - aws_access_key_id=credentials[cloud]["access-key"], - aws_secret_access_key=credentials[cloud]["secret-key"], - region_name=config["region"], - ) - s3 = session.resource( - "s3", endpoint_url=construct_endpoint(config["endpoint"], config["region"]) - ) - bucket = s3.Bucket(config["bucket"]) - # GCS doesn't support batch delete operation, so delete the objects one by one. - for bucket_object in bucket.objects.filter(Prefix=config["path"].lstrip("/")): - bucket_object.delete() - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_backup(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm) -> None: - """Build and deploy two units of PostgreSQL and then test the backup and restore actions.""" - # Deploy S3 Integrator and TLS Certificates Operator. - await ops_test.model.deploy(S3_INTEGRATOR_APP_NAME) - await ops_test.model.deploy(tls_certificates_app_name, config=tls_config, channel=tls_channel) - - for cloud, config in cloud_configs[0].items(): - # Deploy and relate PostgreSQL to S3 integrator (one database app for each cloud for now - # as archive_mode is disabled after restoring the backup) and to TLS Certificates Operator - # (to be able to create backups from replicas). - database_app_name = f"{DATABASE_APP_NAME}-{cloud.lower()}" - await ops_test.model.deploy( - charm, - application_name=database_app_name, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.relate(database_app_name, S3_INTEGRATOR_APP_NAME) - await ops_test.model.relate(database_app_name, tls_certificates_app_name) - - # Configure and set access and secret keys. - logger.info(f"configuring S3 integrator for {cloud}") - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(config) - action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( - "sync-s3-credentials", - **cloud_configs[1][cloud], - ) - await action.wait() - async with ops_test.fast_forward(fast_interval="60s"): - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1500 - ) - - primary = await get_primary(ops_test, f"{database_app_name}/0") - for unit in ops_test.model.applications[database_app_name].units: - if unit.name != primary: - replica = unit.name - break - - # Write some data. - password = await get_password(ops_test, primary) - address = get_unit_address(ops_test, primary) - logger.info("creating a table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute( - "CREATE TABLE IF NOT EXISTS backup_table_1 (test_collumn INT );" - ) - connection.close() - - # Run the "create backup" action. - logger.info("creating a backup") - action = await ops_test.model.units.get(replica).run_action("create-backup") - await action.wait() - backup_status = action.results.get("backup-status") - assert backup_status, "backup hasn't succeeded" - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1000 - ) - - # Run the "list backups" action. - logger.info("listing the available backups") - action = await ops_test.model.units.get(replica).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - # 2 lines for header output, 1 backup line ==> 3 total lines - assert len(backups.split("\n")) == 3, "full backup is not outputted" - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Write some data. - logger.info("creating a second table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute("CREATE TABLE backup_table_2 (test_collumn INT );") - connection.close() - - # Run the "create backup" action. - logger.info("creating a backup") - action = await ops_test.model.units.get(replica).run_action( - "create-backup", **{"type": "differential"} - ) - await action.wait() - backup_status = action.results.get("backup-status") - assert backup_status, "backup hasn't succeeded" - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Run the "list backups" action. - logger.info("listing the available backups") - action = await ops_test.model.units.get(replica).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - # 2 lines for header output, 2 backup lines ==> 4 total lines - assert len(backups.split("\n")) == 4, "differential backup is not outputted" - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Write some data. - logger.info("creating a second table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute("CREATE TABLE backup_table_3 (test_collumn INT );") - connection.close() - # Scale down to be able to restore. - async with ops_test.fast_forward(): - await ops_test.model.destroy_unit(replica) - await ops_test.model.block_until( - lambda: len(ops_test.model.applications[database_app_name].units) == 1 - ) - - for unit in ops_test.model.applications[database_app_name].units: - remaining_unit = unit - break - - # Run the "restore backup" action for differential backup. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - last_diff_backup = backups.split("\n")[-1] - backup_id = last_diff_backup.split()[0] - action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Check that the backup was correctly restored by having only the first created table. - logger.info("checking that the backup was correctly restored") - primary = await get_primary(ops_test, remaining_unit.name) - address = get_unit_address(ops_test, primary) - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_2' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_3' exists" - connection.close() - - # Run the "restore backup" action for full backup. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - last_full_backup = backups.split("\n")[-2] - backup_id = last_full_backup.split()[0] - action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Check that the backup was correctly restored by having only the first created table. - primary = await get_primary(ops_test, remaining_unit.name) - address = get_unit_address(ops_test, primary) - logger.info("checking that the backup was correctly restored") - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_2' exists" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_3' exists" - connection.close() - - # Run the following steps only in one cloud (it's enough for those checks). - if cloud == list(cloud_configs[0].keys())[0]: - # Remove the relation to the TLS certificates operator. - await ops_test.model.applications[database_app_name].remove_relation( - f"{database_app_name}:certificates", f"{tls_certificates_app_name}:certificates" - ) - await ops_test.model.wait_for_idle( - apps=[database_app_name], status="active", timeout=1000 - ) - - # Scale up to be able to test primary and leader being different. - async with ops_test.fast_forward(): - await scale_application(ops_test, database_app_name, 2) - - # Ensure replication is working correctly. - new_unit_name = f"{database_app_name}/2" - address = get_unit_address(ops_test, new_unit_name) - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], f"replication isn't working correctly: table 'backup_table_1' doesn't exist in {new_unit_name}" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert not cursor.fetchone()[ - 0 - ], f"replication isn't working correctly: table 'backup_table_2' exists in {new_unit_name}" - connection.close() - - switchover(ops_test, primary, new_unit_name) - - # Get the new primary unit. - primary = await get_primary(ops_test, new_unit_name) - # Check that the primary changed. - for attempt in Retrying( - stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - assert primary == new_unit_name - - # Ensure stanza is working correctly. - logger.info("listing the available backups") - action = await ops_test.model.units.get(new_unit_name).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - assert backups, "backups not outputted" - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Remove the database app. - await ops_test.model.remove_application(database_app_name, block_until_done=True) - - # Remove the TLS operator. - await ops_test.model.remove_application(tls_certificates_app_name, block_until_done=True) - - -@pytest.mark.group(1) -async def test_restore_on_new_cluster(ops_test: OpsTest, github_secrets, charm) -> None: - """Test that is possible to restore a backup to another PostgreSQL cluster.""" - previous_database_app_name = f"{DATABASE_APP_NAME}-gcp" - database_app_name = f"new-{DATABASE_APP_NAME}" - await ops_test.model.deploy(charm, application_name=previous_database_app_name) - await ops_test.model.deploy( - charm, - application_name=database_app_name, - series=CHARM_SERIES, - ) - await ops_test.model.relate(previous_database_app_name, S3_INTEGRATOR_APP_NAME) - await ops_test.model.relate(database_app_name, S3_INTEGRATOR_APP_NAME) - async with ops_test.fast_forward(): - logger.info( - "waiting for the database charm to become blocked due to existing backups from another cluster in the repository" - ) - await wait_for_idle_on_blocked( - ops_test, - previous_database_app_name, - 2, - S3_INTEGRATOR_APP_NAME, - ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, - ) - logger.info( - "waiting for the database charm to become blocked due to existing backups from another cluster in the repository" - ) - await wait_for_idle_on_blocked( - ops_test, - database_app_name, - 0, - S3_INTEGRATOR_APP_NAME, - ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, - ) - - # Remove the database app with the same name as the previous one (that was used only to test - # that the cluster becomes blocked). - await ops_test.model.remove_application(previous_database_app_name, block_until_done=True) - - # Run the "list backups" action. - unit_name = f"{database_app_name}/0" - logger.info("listing the available backups") - action = await ops_test.model.units.get(unit_name).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - assert backups, "backups not outputted" - await wait_for_idle_on_blocked( - ops_test, - database_app_name, - 0, - S3_INTEGRATOR_APP_NAME, - ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, - ) - - # Run the "restore backup" action. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - most_recent_backup = backups.split("\n")[-1] - backup_id = most_recent_backup.split()[0] - action = await ops_test.model.units.get(unit_name).run_action( - "restore", **{"backup-id": backup_id} - ) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - unit = ops_test.model.units.get(f"{database_app_name}/0") - await ops_test.model.block_until( - lambda: unit.workload_status_message == ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE - ) - - # Check that the backup was correctly restored by having only the first created table. - logger.info("checking that the backup was correctly restored") - password = await get_password(ops_test, unit_name) - address = get_unit_address(ops_test, unit_name) - with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - connection.close() - - -@pytest.mark.group(1) -async def test_invalid_config_and_recovery_after_fixing_it( - ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict] -) -> None: - """Test that the charm can handle invalid and valid backup configurations.""" - database_app_name = f"new-{DATABASE_APP_NAME}" - - # Provide invalid backup configurations. - logger.info("configuring S3 integrator for an invalid cloud") - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config({ - "endpoint": "endpoint", - "bucket": "bucket", - "path": "path", - "region": "region", - }) - action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( - "sync-s3-credentials", - **{ - "access-key": "access-key", - "secret-key": "secret-key", - }, - ) - await action.wait() - logger.info("waiting for the database charm to become blocked") - unit = ops_test.model.units.get(f"{database_app_name}/0") - await ops_test.model.block_until( - lambda: unit.workload_status_message == FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE - ) - - # Provide valid backup configurations, but from another cluster repository. - logger.info( - "configuring S3 integrator for a valid cloud, but with the path of another cluster repository" - ) - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(cloud_configs[0][AWS]) - action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( - "sync-s3-credentials", - **cloud_configs[1][AWS], - ) - await action.wait() - logger.info("waiting for the database charm to become blocked") - unit = ops_test.model.units.get(f"{database_app_name}/0") - await ops_test.model.block_until( - lambda: unit.workload_status_message == ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE - ) - - # Provide valid backup configurations, with another path in the S3 bucket. - logger.info("configuring S3 integrator for a valid cloud") - config = cloud_configs[0][AWS].copy() - config["path"] = f"/postgresql/{uuid.uuid1()}" - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(config) - logger.info("waiting for the database charm to become active") - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active" - ) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py deleted file mode 100644 index 12ac5c5a46..0000000000 --- a/tests/integration/test_charm.py +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - - -import logging - -import psycopg2 -import pytest -import requests -from psycopg2 import sql -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_attempt, wait_exponential, wait_fixed - -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - STORAGE_PATH, - check_cluster_members, - convert_records_to_dict, - db_connect, - find_unit, - get_password, - get_primary, - get_unit_address, - scale_application, - switchover, -) - -logger = logging.getLogger(__name__) - -UNIT_IDS = [0, 1, 2] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy(ops_test: OpsTest, charm: str): - """Deploy the charm-under-test. - - Assert on the unit status before any relations/configurations take place. - """ - # Deploy the charm with Patroni resource. - await ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - # Reducing the update status frequency to speed up the triggering of deferred events. - await ops_test.model.set_config({"update-status-hook-interval": "10s"}) - - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1500) - assert ops_test.model.applications[DATABASE_APP_NAME].units[0].workload_status == "active" - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.parametrize("unit_id", UNIT_IDS) -async def test_database_is_up(ops_test: OpsTest, unit_id: int): - # Query Patroni REST API and check the status that indicates - # both Patroni and PostgreSQL are up and running. - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - result = requests.get(f"http://{host}:8008/health") - assert result.status_code == 200 - - -@pytest.mark.group(1) -@pytest.mark.parametrize("unit_id", UNIT_IDS) -async def test_exporter_is_up(ops_test: OpsTest, unit_id: int): - # Query Patroni REST API and check the status that indicates - # both Patroni and PostgreSQL are up and running. - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - result = requests.get(f"http://{host}:9187/metrics") - assert result.status_code == 200 - assert "pg_exporter_last_scrape_error 0" in result.content.decode( - "utf8" - ), "Scrape error in postgresql_prometheus_exporter" - - -@pytest.mark.group(1) -@pytest.mark.parametrize("unit_id", UNIT_IDS) -async def test_settings_are_correct(ops_test: OpsTest, unit_id: int): - # Connect to the PostgreSQL instance. - # Retrieving the operator user password using the action. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - password = await get_password(ops_test, any_unit_name) - - # Connect to PostgreSQL. - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - logger.info("connecting to the database host: %s", host) - with db_connect(host, password) as connection: - assert connection.status == psycopg2.extensions.STATUS_READY - - # Retrieve settings from PostgreSQL pg_settings table. - # Here the SQL query gets a key-value pair composed by the name of the setting - # and its value, filtering the retrieved data to return only the settings - # that were set by Patroni. - settings_names = [ - "archive_command", - "archive_mode", - "autovacuum", - "data_directory", - "cluster_name", - "data_checksums", - "fsync", - "full_page_writes", - "lc_messages", - "listen_addresses", - "log_autovacuum_min_duration", - "log_checkpoints", - "log_destination", - "log_temp_files", - "log_timezone", - "max_connections", - "wal_level", - ] - with connection.cursor() as cursor: - cursor.execute( - sql.SQL("SELECT name,setting FROM pg_settings WHERE name IN ({});").format( - sql.SQL(", ").join(sql.Placeholder() * len(settings_names)) - ), - settings_names, - ) - records = cursor.fetchall() - settings = convert_records_to_dict(records) - connection.close() - - # Validate each configuration set by Patroni on PostgreSQL. - assert settings["archive_command"] == "/bin/true" - assert settings["archive_mode"] == "on" - assert settings["autovacuum"] == "on" - assert settings["cluster_name"] == DATABASE_APP_NAME - assert settings["data_directory"] == f"{STORAGE_PATH}/var/lib/postgresql" - assert settings["data_checksums"] == "on" - assert settings["fsync"] == "on" - assert settings["full_page_writes"] == "on" - assert settings["lc_messages"] == "en_US.UTF8" - assert settings["listen_addresses"] == host - assert settings["log_autovacuum_min_duration"] == "60000" - assert settings["log_checkpoints"] == "on" - assert settings["log_destination"] == "stderr" - assert settings["log_temp_files"] == "1" - assert settings["log_timezone"] == "UTC" - assert settings["max_connections"] == "100" - assert settings["wal_level"] == "logical" - - # Retrieve settings from Patroni REST API. - result = requests.get(f"http://{host}:8008/config") - settings = result.json() - - # Validate each configuration related to Patroni - assert settings["postgresql"]["use_pg_rewind"] is True - assert settings["postgresql"]["remove_data_directory_on_rewind_failure"] is True - assert settings["postgresql"]["remove_data_directory_on_diverged_timelines"] is True - assert settings["loop_wait"] == 10 - assert settings["retry_timeout"] == 10 - assert settings["maximum_lag_on_failover"] == 1048576 - - logger.warning("Asserting port ranges") - unit = ops_test.model.applications[DATABASE_APP_NAME].units[unit_id] - assert unit.data["port-ranges"][0]["from-port"] == 5432 - assert unit.data["port-ranges"][0]["to-port"] == 5432 - assert unit.data["port-ranges"][0]["protocol"] == "tcp" - - -@pytest.mark.group(1) -async def test_postgresql_parameters_change(ops_test: OpsTest) -> None: - """Test that's possible to change PostgreSQL parameters.""" - await ops_test.model.applications[DATABASE_APP_NAME].set_config({ - "memory_max_prepared_transactions": "100", - "memory_shared_buffers": "128", - "response_lc_monetary": "en_GB.utf8", - "experimental_max_connections": "200", - }) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", idle_period=30) - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - password = await get_password(ops_test, any_unit_name) - - # Connect to PostgreSQL. - for unit_id in UNIT_IDS: - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - logger.info("connecting to the database host: %s", host) - try: - with psycopg2.connect( - f"dbname='postgres' user='operator' host='{host}' password='{password}' connect_timeout=1" - ) as connection, connection.cursor() as cursor: - settings_names = [ - "max_prepared_transactions", - "shared_buffers", - "lc_monetary", - "max_connections", - ] - cursor.execute( - sql.SQL("SELECT name,setting FROM pg_settings WHERE name IN ({});").format( - sql.SQL(", ").join(sql.Placeholder() * len(settings_names)) - ), - settings_names, - ) - records = cursor.fetchall() - settings = convert_records_to_dict(records) - - # Validate each configuration set by Patroni on PostgreSQL. - assert settings["max_prepared_transactions"] == "100" - assert settings["shared_buffers"] == "128" - assert settings["lc_monetary"] == "en_GB.utf8" - assert settings["max_connections"] == "200" - finally: - connection.close() - - -@pytest.mark.group(1) -async def test_scale_down_and_up(ops_test: OpsTest): - """Test data is replicated to new units after a scale up.""" - # Ensure the initial number of units in the application. - initial_scale = len(UNIT_IDS) - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale) - - # Scale down the application. - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale - 1) - - # Ensure the member was correctly removed from the cluster - # (by comparing the cluster members and the current units). - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # Scale up the application (2 more units than the current scale). - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale + 1) - - # Assert the correct members are part of the cluster. - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # Test the deletion of the unit that is both the leader and the primary. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary = await get_primary(ops_test, any_unit_name) - leader_unit = await find_unit(ops_test, leader=True, application=DATABASE_APP_NAME) - - # Trigger a switchover if the primary and the leader are not the same unit. - if primary != leader_unit.name: - switchover(ops_test, primary, leader_unit.name) - - # Get the new primary unit. - primary = await get_primary(ops_test, any_unit_name) - # Check that the primary changed. - for attempt in Retrying( - stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - assert primary == leader_unit.name - - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(leader_unit.name) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=1000, wait_for_exact_units=initial_scale - ) - - # Assert the correct members are part of the cluster. - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # Scale up the application (2 more units than the current scale). - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale + 2) - - # Test the deletion of both the unit that is the leader and the unit that is the primary. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary = await get_primary(ops_test, any_unit_name) - leader_unit = await find_unit(ops_test, DATABASE_APP_NAME, True) - - # Trigger a switchover if the primary and the leader are the same unit. - if primary == leader_unit.name: - switchover(ops_test, primary) - - # Get the new primary unit. - primary = await get_primary(ops_test, any_unit_name) - # Check that the primary changed. - for attempt in Retrying( - stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - assert primary != leader_unit.name - - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(primary, leader_unit.name) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="active", - timeout=2000, - wait_for_exact_units=initial_scale, - ) - - # Assert the correct members are part of the cluster. - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # End with the cluster having the initial number of units. - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale) - - -@pytest.mark.group(1) -async def test_persist_data_through_primary_deletion(ops_test: OpsTest): - """Test data persists through a primary deletion.""" - # Set a composite application name in order to test in more than one series at the same time. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - for attempt in Retrying(stop=stop_after_attempt(3), wait=wait_fixed(5), reraise=True): - with attempt: - primary = await get_primary(ops_test, any_unit_name) - password = await get_password(ops_test, primary) - - # Write data to primary IP. - host = get_unit_address(ops_test, primary) - logger.info(f"connecting to primary {primary} on {host}") - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - cursor.execute("CREATE TABLE primarydeletiontest (testcol INT);") - connection.close() - - # Remove one unit. - await ops_test.model.destroy_units( - primary, - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1500) - - # Add the unit again. - await ops_test.model.applications[DATABASE_APP_NAME].add_unit(count=1) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=2000) - - # Testing write occurred to every postgres instance by reading from them - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - host = unit.public_address - logger.info("connecting to the database host: %s", host) - with db_connect(host, password) as connection: - with connection.cursor() as cursor: - # Ensure we can read from "primarydeletiontest" table - cursor.execute("SELECT * FROM primarydeletiontest;") - connection.close() diff --git a/tests/integration/test_db.py b/tests/integration/test_db.py deleted file mode 100644 index 5ea134700f..0000000000 --- a/tests/integration/test_db.py +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import asyncio -import logging - -import psycopg2 as psycopg2 -import pytest as pytest -from juju.errors import JujuUnitError -from mailmanclient import Client -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from . import markers -from .helpers import ( - APPLICATION_NAME, - CHARM_SERIES, - DATABASE_APP_NAME, - assert_sync_standbys, - build_connection_string, - check_database_users_existence, - check_databases_creation, - deploy_and_relate_application_with_postgresql, - deploy_and_relate_bundle_with_postgresql, - find_unit, - get_leader_unit, - run_command_on_unit, -) - -logger = logging.getLogger(__name__) - -LIVEPATCH_APP_NAME = "livepatch" -MAILMAN3_CORE_APP_NAME = "mailman3-core" -APPLICATION_UNITS = 1 -DATABASE_UNITS = 2 -RELATION_NAME = "db" - -ROLES_BLOCKING_MESSAGE = ( - "roles requested through relation, use postgresql_client interface instead" -) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_mailman3_core_db(ops_test: OpsTest, charm: str) -> None: - """Deploy Mailman3 Core to test the 'db' relation.""" - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=DATABASE_UNITS, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - # Wait until the PostgreSQL charm is successfully deployed. - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="active", - timeout=1500, - wait_for_exact_units=DATABASE_UNITS, - ) - - # Extra config option for Mailman3 Core. - config = {"hostname": "example.org"} - # Deploy and test the deployment of Mailman3 Core. - relation_id = await deploy_and_relate_application_with_postgresql( - ops_test, - "mailman3-core", - MAILMAN3_CORE_APP_NAME, - APPLICATION_UNITS, - config, - ) - await check_databases_creation(ops_test, ["mailman3"]) - - mailman3_core_users = [f"relation-{relation_id}"] - - await check_database_users_existence(ops_test, mailman3_core_users, []) - - # Assert Mailman3 Core is configured to use PostgreSQL instead of SQLite. - mailman_unit = ops_test.model.applications[MAILMAN3_CORE_APP_NAME].units[0] - result = await run_command_on_unit(ops_test, mailman_unit.name, "mailman info") - assert "db url: postgres://" in result - - # Do some CRUD operations using Mailman3 Core client. - domain_name = "canonical.com" - list_name = "postgresql-list" - credentials = ( - result.split("credentials: ")[1].strip().split(":") - ) # This outputs a list containing username and password. - client = Client( - f"http://{mailman_unit.public_address}:8001/3.1", credentials[0], credentials[1] - ) - - # Create a domain and list the domains to check that the new one is there. - domain = client.create_domain(domain_name) - assert domain_name in [domain.mail_host for domain in client.domains] - - # Update the domain by creating a mailing list into it. - mailing_list = domain.create_list(list_name) - assert mailing_list.fqdn_listname in [ - mailing_list.fqdn_listname for mailing_list in domain.lists - ] - - # Delete the domain and check that the change was persisted. - domain.delete() - assert domain_name not in [domain.mail_host for domain in client.domains] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_relation_data_is_updated_correctly_when_scaling(ops_test: OpsTest): - """Test that relation data, like connection data, is updated correctly when scaling.""" - # Retrieve the list of current database unit names. - units_to_remove = [unit.name for unit in ops_test.model.applications[DATABASE_APP_NAME].units] - - async with ops_test.fast_forward(): - # Add two more units. - await ops_test.model.applications[DATABASE_APP_NAME].add_units(2) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=1500, wait_for_exact_units=4 - ) - - assert_sync_standbys( - ops_test.model.applications[DATABASE_APP_NAME].units[0].public_address, 2 - ) - - # Remove the original units. - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(*[ - unit for unit in units_to_remove if unit != leader_unit.name - ]) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=3 - ) - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(leader_unit.name) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=2 - ) - - # Get the updated connection data and assert it can be used - # to write and read some data properly. - database_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary_connection_string = await build_connection_string( - ops_test, MAILMAN3_CORE_APP_NAME, RELATION_NAME, remote_unit_name=database_unit_name - ) - replica_connection_string = await build_connection_string( - ops_test, - MAILMAN3_CORE_APP_NAME, - RELATION_NAME, - read_only_endpoint=True, - remote_unit_name=database_unit_name, - ) - - # Connect to the database using the primary connection string. - with psycopg2.connect(primary_connection_string) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - # Check that it's possible to write and read data from the database that - # was created for the application. - cursor.execute("DROP TABLE IF EXISTS test;") - cursor.execute("CREATE TABLE test(data TEXT);") - cursor.execute("INSERT INTO test(data) VALUES('some data');") - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - connection.close() - - # Connect to the database using the replica endpoint. - with psycopg2.connect(replica_connection_string) as connection: - with connection.cursor() as cursor: - # Read some data. - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - - # Try to alter some data in a read-only transaction. - with pytest.raises(psycopg2.errors.ReadOnlySqlTransaction): - cursor.execute("DROP TABLE test;") - connection.close() - - # Remove the relation and test that its user was deleted - # (by checking that the connection string doesn't work anymore). - async with ops_test.fast_forward(): - await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( - f"{DATABASE_APP_NAME}:{RELATION_NAME}", f"{MAILMAN3_CORE_APP_NAME}:{RELATION_NAME}" - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): - with attempt: - with pytest.raises(psycopg2.OperationalError): - psycopg2.connect(primary_connection_string) - - -@pytest.mark.group(1) -@markers.amd64_only # sentry snap not available for arm64 -async def test_sentry_db_blocked(ops_test: OpsTest, charm: str) -> None: - async with ops_test.fast_forward(): - # Deploy Sentry and its dependencies. - await asyncio.gather( - ops_test.model.deploy( - "omnivector-sentry", application_name="sentry1", series="bionic" - ), - ops_test.model.deploy("haproxy", series="focal"), - ops_test.model.deploy("omnivector-redis", application_name="redis", series="bionic"), - ) - await ops_test.model.wait_for_idle( - apps=["sentry1"], - status="blocked", - raise_on_blocked=False, - timeout=1000, - ) - await asyncio.gather( - ops_test.model.relate("sentry1", "redis"), - ops_test.model.relate("sentry1", f"{DATABASE_APP_NAME}:db"), - ops_test.model.relate("sentry1", "haproxy"), - ) - - # Only the leader will block - leader_unit = await find_unit(ops_test, DATABASE_APP_NAME, True) - - try: - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="blocked", - raise_on_blocked=True, - timeout=1000, - ) - assert False, "Leader didn't block" - except JujuUnitError: - pass - - assert ( - leader_unit.workload_status_message - == "extensions requested through relation, enable them through config options" - ) - - # Verify that the charm unblocks when the extensions are enabled after being blocked - # due to disabled extensions. - logger.info("Verifying that the charm unblocks when the extensions are enabled") - config = {"plugin_citext_enable": "True"} - await ops_test.model.applications[DATABASE_APP_NAME].set_config(config) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, "sentry1"], - status="active", - raise_on_blocked=False, - idle_period=15, - ) - - # Verify that the charm doesn't block when the extensions are enabled - # (another sentry deployment is used because it doesn't request a database - # again after the relation with the PostgreSQL charm is destroyed and reestablished). - logger.info("Verifying that the charm doesn't block when the extensions are enabled") - await asyncio.gather( - ops_test.model.remove_application("sentry1", block_until_done=True), - ops_test.model.deploy( - "omnivector-sentry", application_name="sentry2", series="bionic" - ), - ) - await asyncio.gather( - ops_test.model.relate("sentry2", "redis"), - ops_test.model.relate("sentry2", f"{DATABASE_APP_NAME}:db"), - ops_test.model.relate("sentry2", "haproxy"), - ) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, "sentry2"], status="active", raise_on_blocked=False - ) - - await asyncio.gather( - ops_test.model.remove_application("redis", block_until_done=True), - ops_test.model.remove_application("sentry2", block_until_done=True), - ops_test.model.remove_application("haproxy", block_until_done=True), - ) - - -@pytest.mark.group(1) -async def test_roles_blocking(ops_test: OpsTest, charm: str) -> None: - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=APPLICATION_NAME, - config={"legacy_roles": True}, - series=CHARM_SERIES, - channel="edge", - ) - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=f"{APPLICATION_NAME}2", - config={"legacy_roles": True}, - series=CHARM_SERIES, - channel="edge", - ) - - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME, f"{APPLICATION_NAME}2"], - status="active", - timeout=1000, - ) - - await asyncio.gather( - ops_test.model.relate(f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}:db"), - ops_test.model.relate(f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}2:db"), - ) - - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - await ops_test.model.block_until( - lambda: leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE, timeout=1000 - ) - - assert leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE - - logger.info("Verify that the charm remains blocked if there are other blocking relations") - await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( - f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}:db" - ) - - await ops_test.model.block_until( - lambda: leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE, timeout=1000 - ) - - assert leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE - - logger.info("Verify that active status is restored when all blocking relations are gone") - await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( - f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}2:db" - ) - - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="active", - timeout=1000, - ) - - -@markers.juju2 -@pytest.mark.group(1) -@markers.amd64_only # canonical-livepatch-server charm (in bundle) not available for arm64 -async def test_canonical_livepatch_onprem_bundle_db(ops_test: OpsTest) -> None: - # Deploy and test the Livepatch onprem bundle (using this PostgreSQL charm - # and an overlay to make the Ubuntu Advantage charm work with PostgreSQL). - # We intentionally wait for the `✘ sync_token not set` status message as we - # aren't providing an Ubuntu Pro token (as this is just a test to ensure - # the database works in the context of the relation with the Livepatch charm). - overlay = { - "applications": {"ubuntu-advantage": {"charm": "ubuntu-advantage", "series": CHARM_SERIES}} - } - await deploy_and_relate_bundle_with_postgresql( - ops_test, - "canonical-livepatch-onprem", - LIVEPATCH_APP_NAME, - relation_name="db", - status="blocked", - status_message="✘ sync_token not set", - overlay=overlay, - ) - - action = await ops_test.model.units.get(f"{LIVEPATCH_APP_NAME}/0").run_action("schema-upgrade") - await action.wait() - assert action.results.get("Code") == "0", "schema-upgrade action hasn't succeeded" diff --git a/tests/integration/test_db_admin.py b/tests/integration/test_db_admin.py deleted file mode 100644 index b8ad190d34..0000000000 --- a/tests/integration/test_db_admin.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import json -import logging - -import psycopg2 -import pytest -from landscape_api.base import HTTPError, run_query -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - build_connection_string, - check_database_users_existence, - check_databases_creation, - deploy_and_relate_bundle_with_postgresql, - ensure_correct_relation_data, - get_landscape_api_credentials, - get_machine_from_unit, - get_primary, - primary_changed, - start_machine, - stop_machine, - switchover, -) - -logger = logging.getLogger(__name__) - -HAPROXY_APP_NAME = "haproxy" -LANDSCAPE_APP_NAME = "landscape-server" -RABBITMQ_APP_NAME = "rabbitmq-server" -DATABASE_UNITS = 3 -RELATION_NAME = "db-admin" - - -@pytest.mark.group(1) -async def test_landscape_scalable_bundle_db(ops_test: OpsTest, charm: str) -> None: - """Deploy Landscape Scalable Bundle to test the 'db-admin' relation.""" - await ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=DATABASE_UNITS, - series=CHARM_SERIES, - config={"profile": "testing", "plugin_plpython3u_enable": "True"}, - ) - - # Deploy and test the Landscape Scalable bundle (using this PostgreSQL charm). - relation_id = await deploy_and_relate_bundle_with_postgresql( - ops_test, - "ch:landscape-scalable", - LANDSCAPE_APP_NAME, - main_application_num_units=2, - relation_name=RELATION_NAME, - timeout=3000, - ) - await check_databases_creation( - ops_test, - [ - "landscape-standalone-account-1", - "landscape-standalone-knowledge", - "landscape-standalone-main", - "landscape-standalone-package", - "landscape-standalone-resource-1", - "landscape-standalone-session", - ], - ) - - landscape_users = [f"relation-{relation_id}"] - - await check_database_users_existence(ops_test, landscape_users, []) - - # Create the admin user on Landscape through configs. - await ops_test.model.applications["landscape-server"].set_config({ - "admin_email": "admin@canonical.com", - "admin_name": "Admin", - "admin_password": "test1234", - }) - await ops_test.model.wait_for_idle( - apps=["landscape-server", DATABASE_APP_NAME], - status="active", - timeout=1200, - ) - - # Connect to the Landscape API through HAProxy and do some CRUD calls (without the update). - key, secret = await get_landscape_api_credentials(ops_test) - haproxy_unit = ops_test.model.applications[HAPROXY_APP_NAME].units[0] - api_uri = f"https://{haproxy_unit.public_address}/api/" - - # Create a role and list the available roles later to check that the new one is there. - role_name = "User1" - run_query(key, secret, "CreateRole", {"name": role_name}, api_uri, False) - api_response = run_query(key, secret, "GetRoles", {}, api_uri, False) - assert role_name in [user["name"] for user in json.loads(api_response)] - - # Remove the role and assert it isn't part of the roles list anymore. - run_query(key, secret, "RemoveRole", {"name": role_name}, api_uri, False) - api_response = run_query(key, secret, "GetRoles", {}, api_uri, False) - assert role_name not in [user["name"] for user in json.loads(api_response)] - - await ensure_correct_relation_data(ops_test, DATABASE_UNITS, LANDSCAPE_APP_NAME, RELATION_NAME) - - # Enable automatically-retry-hooks due to https://bugs.launchpad.net/juju/+bug/1999758 - # (the implemented workaround restarts the unit in the middle of the start hook, - # so the hook fails, and it's not retried on CI). - await ops_test.model.set_config({"automatically-retry-hooks": "true"}) - - # Stop the primary unit machine. - logger.info("restarting primary") - former_primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - former_primary_machine = await get_machine_from_unit(ops_test, former_primary) - await stop_machine(ops_test, former_primary_machine) - - # Await for a new primary to be elected. - assert await primary_changed(ops_test, former_primary) - - # Start the former primary unit machine again. - await start_machine(ops_test, former_primary_machine) - - # Wait for the unit to be ready again. Some errors in the start hook may happen due to - # rebooting the unit machine in the middle of a hook (what is needed when the issue from - # https://bugs.launchpad.net/juju/+bug/1999758 happens). - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=1500, raise_on_error=False - ) - - await ensure_correct_relation_data(ops_test, DATABASE_UNITS, LANDSCAPE_APP_NAME, RELATION_NAME) - - # Trigger a switchover. - logger.info("triggering a switchover") - primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - switchover(ops_test, primary) - - # Await for a new primary to be elected. - assert await primary_changed(ops_test, primary) - - await ensure_correct_relation_data(ops_test, DATABASE_UNITS, LANDSCAPE_APP_NAME, RELATION_NAME) - - # Trigger a config change to start the Landscape API service again. - # The Landscape API was stopped after a new primary (postgresql) was elected. - await ops_test.model.applications["landscape-server"].set_config({ - "admin_name": "Admin 1", - }) - await ops_test.model.wait_for_idle( - apps=["landscape-server", DATABASE_APP_NAME], timeout=1500, status="active" - ) - - # Create a role and list the available roles later to check that the new one is there. - role_name = "User2" - try: - run_query(key, secret, "CreateRole", {"name": role_name}, api_uri, False) - except HTTPError as e: - assert False, f"error when trying to create role on Landscape: {e}" - - database_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - connection_string = await build_connection_string( - ops_test, LANDSCAPE_APP_NAME, RELATION_NAME, remote_unit_name=database_unit_name - ) - - # Remove the applications from the bundle. - await ops_test.model.remove_application(LANDSCAPE_APP_NAME, block_until_done=True) - await ops_test.model.remove_application(HAPROXY_APP_NAME, block_until_done=True) - await ops_test.model.remove_application(RABBITMQ_APP_NAME, block_until_done=True) - - # Remove the relation and test that its user was deleted - # (by checking that the connection string doesn't work anymore). - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): - with attempt: - with pytest.raises(psycopg2.OperationalError): - psycopg2.connect(connection_string) - - # Remove the PostgreSQL application. - await ops_test.model.remove_application(DATABASE_APP_NAME, block_until_done=True) diff --git a/tests/integration/test_password_rotation.py b/tests/integration/test_password_rotation.py deleted file mode 100644 index ffb4cca458..0000000000 --- a/tests/integration/test_password_rotation.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import json -import time - -import psycopg2 -import pytest -from pytest_operator.plugin import OpsTest - -from . import markers -from .helpers import ( - CHARM_SERIES, - METADATA, - check_patroni, - db_connect, - get_leader_unit, - get_password, - get_primary, - get_unit_address, - restart_patroni, - run_command_on_unit, - set_password, -) - -APP_NAME = METADATA["name"] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy_active(ops_test: OpsTest): - """Build the charm and deploy it.""" - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=APP_NAME, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1500) - - -@pytest.mark.group(1) -async def test_password_rotation(ops_test: OpsTest): - """Test password rotation action.""" - # Get the initial passwords set for the system users. - any_unit_name = ops_test.model.applications[APP_NAME].units[0].name - superuser_password = await get_password(ops_test, any_unit_name) - replication_password = await get_password(ops_test, any_unit_name, "replication") - monitoring_password = await get_password(ops_test, any_unit_name, "monitoring") - backup_password = await get_password(ops_test, any_unit_name, "backup") - rewind_password = await get_password(ops_test, any_unit_name, "rewind") - - # Get the leader unit name (because passwords can only be set through it). - leader = None - for unit in ops_test.model.applications[APP_NAME].units: - if await unit.is_leader_from_status(): - leader = unit.name - break - - # Change both passwords. - result = await set_password(ops_test, unit_name=leader) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For replication, generate a specific password and pass it to the action. - new_replication_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="replication", password=new_replication_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For monitoring, generate a specific password and pass it to the action. - new_monitoring_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="monitoring", password=new_monitoring_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For backup, generate a specific password and pass it to the action. - new_backup_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="backup", password=new_backup_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For rewind, generate a specific password and pass it to the action. - new_rewind_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="rewind", password=new_rewind_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - new_superuser_password = await get_password(ops_test, any_unit_name) - assert superuser_password != new_superuser_password - assert new_replication_password == await get_password(ops_test, any_unit_name, "replication") - assert replication_password != new_replication_password - assert new_monitoring_password == await get_password(ops_test, any_unit_name, "monitoring") - assert monitoring_password != new_monitoring_password - assert new_backup_password == await get_password(ops_test, any_unit_name, "backup") - assert backup_password != new_backup_password - assert new_rewind_password == await get_password(ops_test, any_unit_name, "rewind") - assert rewind_password != new_rewind_password - - # Restart Patroni on any non-leader unit and check that - # Patroni and PostgreSQL continue to work. - restart_time = time.time() - for unit in ops_test.model.applications[APP_NAME].units: - if not await unit.is_leader_from_status(): - restart_patroni(ops_test, unit.name) - assert check_patroni(ops_test, unit.name, restart_time) - - -@pytest.mark.group(1) -@markers.juju_secrets -async def test_password_from_secret_same_as_cli(ops_test: OpsTest): - """Checking if password is same as returned by CLI. - - I.e. we're manipulating the secret we think we're manipulating. - """ - # - # No way to retrieve a secet by label for now (https://bugs.launchpad.net/juju/+bug/2037104) - # Therefore we take advantage of the fact, that we only have ONE single secret a this point - # So we take the single member of the list - # NOTE: This would BREAK if for instance units had secrets at the start... - # - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - password = await get_password(ops_test, unit_name=leader, username="replication") - complete_command = "list-secrets" - _, stdout, _ = await ops_test.juju(*complete_command.split()) - secret_id = stdout.split("\n")[1].split(" ")[0] - - # Getting back the pw from juju CLI - complete_command = f"show-secret {secret_id} --reveal --format=json" - _, stdout, _ = await ops_test.juju(*complete_command.split()) - data = json.loads(stdout) - assert data[secret_id]["content"]["Data"]["replication-password"] == password - - -@pytest.mark.group(1) -async def test_empty_password(ops_test: OpsTest) -> None: - """Test that the password can't be set to an empty string.""" - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - await set_password(ops_test, unit_name=leader, username="replication", password="") - password = await get_password(ops_test, unit_name=leader, username="replication") - # The password is 'None', BUT NOT because of SECRET_DELETED_LABEL - # `get_secret()` returns a None value (as the field in the secret is set to string value "None") - # And this true None value is turned to a string when the event is setting results. - assert password == "None" - - -@pytest.mark.group(1) -async def test_db_connection_with_empty_password(ops_test: OpsTest): - """Test that user can't connect with empty password.""" - primary = await get_primary(ops_test, f"{APP_NAME}/0") - address = get_unit_address(ops_test, primary) - with pytest.raises(psycopg2.Error): - with db_connect(address, "") as connection: - connection.close() - - -@pytest.mark.group(1) -async def test_no_password_change_on_invalid_password(ops_test: OpsTest) -> None: - """Test that in general, there is no change when password validation fails.""" - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - password1 = await get_password(ops_test, unit_name=leader, username="replication") - # The password has to be minimum 3 characters - await set_password(ops_test, unit_name=leader, username="replication", password="ca" * 1000000) - password2 = await get_password(ops_test, unit_name=leader, username="replication") - # The password didn't change - assert password1 == password2 - - -@pytest.mark.group(1) -async def test_no_password_exposed_on_logs(ops_test: OpsTest) -> None: - """Test that passwords don't get exposed on postgresql logs.""" - for unit in ops_test.model.applications[APP_NAME].units: - try: - logs = await run_command_on_unit( - ops_test, - unit.name, - "grep PASSWORD /var/snap/charmed-postgresql/common/var/log/postgresql/postgresql-*.log", - ) - except Exception: - continue - assert len(logs) == 0, f"Sensitive information detected on {unit.name} logs" diff --git a/tests/integration/test_plugins.py b/tests/integration/test_plugins.py deleted file mode 100644 index 5d78dcd3aa..0000000000 --- a/tests/integration/test_plugins.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import logging - -import psycopg2 as psycopg2 -import pytest as pytest -from pytest_operator.plugin import OpsTest - -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - db_connect, - get_password, - get_primary, - get_unit_address, -) - -logger = logging.getLogger(__name__) - -CITEXT_EXTENSION_STATEMENT = "CREATE TABLE citext_test (value CITEXT);" -DEBVERSION_EXTENSION_STATEMENT = "CREATE TABLE debversion_test (value DEBVERSION);" -HSTORE_EXTENSION_STATEMENT = "CREATE TABLE hstore_test (value hstore);" -PG_TRGM_EXTENSION_STATEMENT = "SELECT word_similarity('word', 'two words');" -PLPYTHON3U_EXTENSION_STATEMENT = 'CREATE FUNCTION plpython_test() RETURNS varchar[] AS $$ return "hello" $$ LANGUAGE plpython3u;' -UNACCENT_EXTENSION_STATEMENT = "SELECT ts_lexize('unaccent','Hôtel');" -BLOOM_EXTENSION_STATEMENT = ( - "CREATE TABLE tbloom_test (i int);CREATE INDEX btreeidx ON tbloom_test USING bloom (i);" -) -BTREEGIN_EXTENSION_STATEMENT = "CREATE TABLE btree_gin_test (a int4);CREATE INDEX btreeginidx ON btree_gin_test USING GIN (a);" -BTREEGIST_EXTENSION_STATEMENT = "CREATE TABLE btree_gist_test (a int4);CREATE INDEX btreegistidx ON btree_gist_test USING GIST (a);" -CUBE_EXTENSION_STATEMENT = "SELECT cube_inter('(0,-1),(1,1)', '(-2),(2)');" -DICTINT_EXTENSION_STATEMENT = "SELECT ts_lexize('intdict', '12345678');" -DICTXSYN_EXTENSION_STATEMENT = "SELECT ts_lexize('xsyn', 'word');" -EARTHDISTANCE_EXTENSION_STATEMENT = "SELECT earth_distance(ll_to_earth(-81.3927381, 30.2918842),ll_to_earth(-87.6473133, 41.8853881));" -FUZZYSTRMATCH_EXTENSION_STATEMENT = "SELECT soundex('hello world!');" -INTARRAY_EXTENSION_STATEMENT = "CREATE TABLE intarray_test (mid INT PRIMARY KEY, sections INT[]);SELECT intarray_test.mid FROM intarray_test WHERE intarray_test.sections @> '{1,2}';" -ISN_EXTENSION_STATEMENT = "SELECT isbn('978-0-393-04002-9');" -LO_EXTENSION_STATEMENT = "CREATE TABLE lo_test (value lo);" -LTREE_EXTENSION_STATEMENT = "CREATE TABLE ltree_test (path ltree);" -OLD_SNAPSHOT_EXTENSION_STATEMENT = "SELECT * from pg_old_snapshot_time_mapping();" -PG_FREESPACEMAP_EXTENSION_STATEMENT = ( - "CREATE TABLE pg_freespacemap_test (i int);SELECT * FROM pg_freespace('pg_freespacemap_test');" -) -PGROWLOCKS_EXTENSION_STATEMENT = ( - "CREATE TABLE pgrowlocks_test (i int);SELECT * FROM pgrowlocks('pgrowlocks_test');" -) -PGSTATTUPLE_EXTENSION_STATEMENT = "SELECT * FROM pgstattuple('pg_catalog.pg_proc');" -PG_VISIBILITY_EXTENSION_STATEMENT = "CREATE TABLE pg_visibility_test (i int);SELECT * FROM pg_visibility('pg_visibility_test'::regclass);" -SEG_EXTENSION_STATEMENT = "SELECT '10(+-)1'::seg as seg;" -TABLEFUNC_EXTENSION_STATEMENT = "SELECT * FROM normal_rand(1000, 5, 3);" -TCN_EXTENSION_STATEMENT = "CREATE TABLE tcn_test (i int);CREATE TRIGGER tcn_test_idx AFTER INSERT OR UPDATE OR DELETE ON tcn_test FOR EACH ROW EXECUTE FUNCTION TRIGGERED_CHANGE_NOTIFICATION();" -TSM_SYSTEM_ROWS_EXTENSION_STATEMENT = "CREATE TABLE tsm_system_rows_test (i int);SELECT * FROM tsm_system_rows_test TABLESAMPLE SYSTEM_ROWS(100);" -TSM_SYSTEM_TIME_EXTENSION_STATEMENT = "CREATE TABLE tsm_system_time_test (i int);SELECT * FROM tsm_system_time_test TABLESAMPLE SYSTEM_TIME(1000);" -UUID_OSSP_EXTENSION_STATEMENT = "SELECT uuid_nil();" -REFINT_EXTENSION_STATEMENT = "CREATE TABLE A (ID int4 not null); CREATE UNIQUE INDEX AI ON A (ID);CREATE TABLE B (REFB int4);CREATE INDEX BI ON B (REFB);CREATE TRIGGER BT BEFORE INSERT OR UPDATE ON B FOR EACH ROW EXECUTE PROCEDURE check_primary_key ('REFB', 'A', 'ID');" -AUTOINC_EXTENSION_STATEMENT = "CREATE TABLE ids (id int4, idesc text);CREATE TRIGGER ids_nextid BEFORE INSERT OR UPDATE ON ids FOR EACH ROW EXECUTE PROCEDURE autoinc (id, next_id);" -INSERT_USERNAME_EXTENSION_STATEMENT = "CREATE TABLE username_test (name text, username text not null);CREATE TRIGGER insert_usernames BEFORE INSERT OR UPDATE ON username_test FOR EACH ROW EXECUTE PROCEDURE insert_username (username);" -MODDATETIME_EXTENSION_STATEMENT = "CREATE TABLE mdt (moddate timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL);CREATE TRIGGER mdt_moddatetime BEFORE UPDATE ON mdt FOR EACH ROW EXECUTE PROCEDURE moddatetime (moddate);" -BOOL_PLPERL_EXTENSION_STATEMENT = "CREATE FUNCTION hello_bool(bool) RETURNS TEXT TRANSFORM FOR TYPE bool LANGUAGE plperl AS $$ my $with_world = shift; return sprintf('hello%s', $with_world ? ' world' : ''); $$;" -HLL_EXTENSION_STATEMENT = "CREATE TABLE hll_test (users hll);" -HYPOPG_EXTENSION_STATEMENT = "CREATE TABLE hypopg_test (id integer, val text); SELECT hypopg_create_index('CREATE INDEX ON hypopg_test (id)');" -IP4R_EXTENSION_STATEMENT = "CREATE TABLE ip4r_test (ip ip4);" -JSONB_PLPERL_EXTENSION_STATEMENT = "CREATE OR REPLACE FUNCTION jsonb_plperl_test(val jsonb) RETURNS jsonb TRANSFORM FOR TYPE jsonb LANGUAGE plperl as $$ return $_[0]; $$;" -ORAFCE_EXTENSION_STATEMENT = "SELECT add_months(date '2005-05-31',1);" -PG_SIMILARITY_EXTENSION_STATEMENT = "SHOW pg_similarity.levenshtein_threshold;" -PLPERL_EXTENSION_STATEMENT = "CREATE OR REPLACE FUNCTION plperl_test(name text) RETURNS text AS $$ return $_SHARED{$_[0]}; $$ LANGUAGE plperl;" -PREFIX_EXTENSION_STATEMENT = "SELECT '123'::prefix_range @> '123456';" -RDKIT_EXTENSION_STATEMENT = "SELECT is_valid_smiles('CCC');" -TDS_FDW_EXTENSION_STATEMENT = "CREATE SERVER mssql_svr FOREIGN DATA WRAPPER tds_fdw OPTIONS (servername 'tds_fdw_test', port '3306', database 'tds_fdw_test', tds_version '7.1');" -ICU_EXT_EXTENSION_STATEMENT = ( - 'CREATE COLLATION "vat-lat" (provider = icu, locale = "la-VA-u-kn-true")' -) -PLTCL_EXTENSION_STATEMENT = ( - "CREATE FUNCTION pltcl_test(integer) RETURNS integer AS $$ return $1 $$ LANGUAGE pltcl STRICT;" -) -POSTGIS_EXTENSION_STATEMENT = "SELECT PostGIS_Full_Version();" -ADDRESS_STANDARDIZER_EXTENSION_STATEMENT = "SELECT num, street, city, zip, zipplus FROM parse_address('1 Devonshire Place, Boston, MA 02109-1234');" -ADDRESS_STANDARDIZER_DATA_US_EXTENSION_STATEMENT = "SELECT house_num, name, suftype, city, country, state, unit FROM standardize_address('us_lex', 'us_gaz', 'us_rules', 'One Devonshire Place, PH 301, Boston, MA 02109');" -POSTGIS_TIGER_GEOCODER_EXTENSION_STATEMENT = "SELECT * FROM standardize_address('tiger.pagc_lex', 'tiger.pagc_gaz', 'tiger.pagc_rules', 'One Devonshire Place, PH 301, Boston, MA 02109-1234');" -POSTGIS_TOPOLOGY_EXTENSION_STATEMENT = "SELECT topology.CreateTopology('nyc_topo', 26918, 0.5);" -POSTGIS_RASTER_EXTENSION_STATEMENT = ( - "CREATE TABLE test_postgis_raster (name varchar, rast raster);" -) -VECTOR_EXTENSION_STATEMENT = ( - "CREATE TABLE vector_test (id bigserial PRIMARY KEY, embedding vector(3));" -) -TIMESCALEDB_EXTENSION_STATEMENT = "CREATE TABLE test_timescaledb (time TIMESTAMPTZ NOT NULL); SELECT create_hypertable('test_timescaledb', 'time');" - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_plugins(ops_test: OpsTest) -> None: - """Build and deploy one unit of PostgreSQL and then test the available plugins.""" - # Build and deploy the PostgreSQL charm. - async with ops_test.fast_forward(): - charm = await ops_test.build_charm(".") - await ops_test.model.deploy( - charm, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1500) - - sql_tests = { - "plugin_citext_enable": CITEXT_EXTENSION_STATEMENT, - "plugin_debversion_enable": DEBVERSION_EXTENSION_STATEMENT, - "plugin_hstore_enable": HSTORE_EXTENSION_STATEMENT, - "plugin_pg_trgm_enable": PG_TRGM_EXTENSION_STATEMENT, - "plugin_plpython3u_enable": PLPYTHON3U_EXTENSION_STATEMENT, - "plugin_unaccent_enable": UNACCENT_EXTENSION_STATEMENT, - "plugin_bloom_enable": BLOOM_EXTENSION_STATEMENT, - "plugin_btree_gin_enable": BTREEGIN_EXTENSION_STATEMENT, - "plugin_btree_gist_enable": BTREEGIST_EXTENSION_STATEMENT, - "plugin_cube_enable": CUBE_EXTENSION_STATEMENT, - "plugin_dict_int_enable": DICTINT_EXTENSION_STATEMENT, - "plugin_dict_xsyn_enable": DICTXSYN_EXTENSION_STATEMENT, - "plugin_earthdistance_enable": EARTHDISTANCE_EXTENSION_STATEMENT, - "plugin_fuzzystrmatch_enable": FUZZYSTRMATCH_EXTENSION_STATEMENT, - "plugin_intarray_enable": INTARRAY_EXTENSION_STATEMENT, - "plugin_isn_enable": ISN_EXTENSION_STATEMENT, - "plugin_lo_enable": LO_EXTENSION_STATEMENT, - "plugin_ltree_enable": LTREE_EXTENSION_STATEMENT, - "plugin_old_snapshot_enable": OLD_SNAPSHOT_EXTENSION_STATEMENT, - "plugin_pg_freespacemap_enable": PG_FREESPACEMAP_EXTENSION_STATEMENT, - "plugin_pgrowlocks_enable": PGROWLOCKS_EXTENSION_STATEMENT, - "plugin_pgstattuple_enable": PGSTATTUPLE_EXTENSION_STATEMENT, - "plugin_pg_visibility_enable": PG_VISIBILITY_EXTENSION_STATEMENT, - "plugin_seg_enable": SEG_EXTENSION_STATEMENT, - "plugin_tablefunc_enable": TABLEFUNC_EXTENSION_STATEMENT, - "plugin_tcn_enable": TCN_EXTENSION_STATEMENT, - "plugin_tsm_system_rows_enable": TSM_SYSTEM_ROWS_EXTENSION_STATEMENT, - "plugin_tsm_system_time_enable": TSM_SYSTEM_TIME_EXTENSION_STATEMENT, - "plugin_uuid_ossp_enable": UUID_OSSP_EXTENSION_STATEMENT, - "plugin_spi_enable": [ - REFINT_EXTENSION_STATEMENT, - AUTOINC_EXTENSION_STATEMENT, - INSERT_USERNAME_EXTENSION_STATEMENT, - MODDATETIME_EXTENSION_STATEMENT, - ], - "plugin_bool_plperl_enable": BOOL_PLPERL_EXTENSION_STATEMENT, - "plugin_hll_enable": HLL_EXTENSION_STATEMENT, - "plugin_postgis_enable": POSTGIS_EXTENSION_STATEMENT, - "plugin_hypopg_enable": HYPOPG_EXTENSION_STATEMENT, - "plugin_ip4r_enable": IP4R_EXTENSION_STATEMENT, - "plugin_plperl_enable": PLPERL_EXTENSION_STATEMENT, - "plugin_jsonb_plperl_enable": JSONB_PLPERL_EXTENSION_STATEMENT, - "plugin_orafce_enable": ORAFCE_EXTENSION_STATEMENT, - "plugin_pg_similarity_enable": ORAFCE_EXTENSION_STATEMENT, - "plugin_prefix_enable": PREFIX_EXTENSION_STATEMENT, - "plugin_rdkit_enable": RDKIT_EXTENSION_STATEMENT, - "plugin_tds_fdw_enable": TDS_FDW_EXTENSION_STATEMENT, - "plugin_icu_ext_enable": ICU_EXT_EXTENSION_STATEMENT, - "plugin_pltcl_enable": PLTCL_EXTENSION_STATEMENT, - "plugin_address_standardizer_enable": ADDRESS_STANDARDIZER_EXTENSION_STATEMENT, - "plugin_address_standardizer_data_us_enable": ADDRESS_STANDARDIZER_DATA_US_EXTENSION_STATEMENT, - "plugin_postgis_tiger_geocoder_enable": POSTGIS_TIGER_GEOCODER_EXTENSION_STATEMENT, - "plugin_postgis_raster_enable": POSTGIS_RASTER_EXTENSION_STATEMENT, - "plugin_postgis_topology_enable": POSTGIS_TOPOLOGY_EXTENSION_STATEMENT, - "plugin_vector_enable": VECTOR_EXTENSION_STATEMENT, - "plugin_timescaledb_enable": TIMESCALEDB_EXTENSION_STATEMENT, - } - - def enable_disable_config(enabled: False): - config = {} - for plugin in sql_tests.keys(): - config[plugin] = f"{enabled}" - return config - - # Check that the available plugins are disabled. - primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - password = await get_password(ops_test, primary) - address = get_unit_address(ops_test, primary) - - config = enable_disable_config(False) - await ops_test.model.applications[DATABASE_APP_NAME].set_config(config) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active") - - logger.info("checking that the plugins are disabled") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - for query in sql_tests.values(): - if isinstance(query, list): - for test in query: - with pytest.raises(psycopg2.Error): - connection.cursor().execute(test) - else: - with pytest.raises(psycopg2.Error): - connection.cursor().execute(query) - connection.close() - - # Enable the plugins. - logger.info("enabling the plugins") - - config = enable_disable_config(True) - await ops_test.model.applications[DATABASE_APP_NAME].set_config(config) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active") - - # Check that the available plugins are enabled. - logger.info("checking that the plugins are enabled") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - for query in sql_tests.values(): - if isinstance(query, list): - for test in query: - connection.cursor().execute(test) - else: - connection.cursor().execute(query) - connection.close() diff --git a/tests/integration/test_subordinates.py b/tests/integration/test_subordinates.py deleted file mode 100644 index f9d30dedba..0000000000 --- a/tests/integration/test_subordinates.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging -from asyncio import gather - -import pytest -from pytest_operator.plugin import OpsTest - -from .helpers import ( - CHARM_SERIES, - scale_application, -) - -DATABASE_APP_NAME = "pg" -LS_CLIENT = "landscape-client" -UBUNTU_PRO_APP_NAME = "ubuntu-advantage" - -logger = logging.getLogger(__name__) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_deploy(ops_test: OpsTest, charm: str, github_secrets): - await gather( - ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=3, - series=CHARM_SERIES, - ), - ops_test.model.deploy( - UBUNTU_PRO_APP_NAME, - config={"token": github_secrets["UBUNTU_PRO_TOKEN"]}, - channel="latest/edge", - num_units=0, - ), - ops_test.model.deploy( - LS_CLIENT, - config={ - "account-name": github_secrets["LANDSCAPE_ACCOUNT_NAME"], - "registration-key": github_secrets["LANDSCAPE_REGISTRATION_KEY"], - "ppa": "ppa:landscape/self-hosted-beta", - }, - channel="latest/edge", - num_units=0, - ), - ) - - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=2000) - await ops_test.model.relate(f"{DATABASE_APP_NAME}:juju-info", f"{LS_CLIENT}:container") - await ops_test.model.relate( - f"{DATABASE_APP_NAME}:juju-info", f"{UBUNTU_PRO_APP_NAME}:juju-info" - ) - await ops_test.model.wait_for_idle( - apps=[LS_CLIENT, UBUNTU_PRO_APP_NAME, DATABASE_APP_NAME], status="active" - ) - - -@pytest.mark.group(1) -async def test_scale_up(ops_test: OpsTest, github_secrets): - await scale_application(ops_test, DATABASE_APP_NAME, 4) - - await ops_test.model.wait_for_idle( - apps=[LS_CLIENT, UBUNTU_PRO_APP_NAME, DATABASE_APP_NAME], status="active", timeout=1500 - ) - - -@pytest.mark.group(1) -async def test_scale_down(ops_test: OpsTest, github_secrets): - await scale_application(ops_test, DATABASE_APP_NAME, 3) - - await ops_test.model.wait_for_idle( - apps=[LS_CLIENT, UBUNTU_PRO_APP_NAME, DATABASE_APP_NAME], status="active", timeout=1500 - ) diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py deleted file mode 100644 index d336f96259..0000000000 --- a/tests/integration/test_tls.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import logging -import os - -import pytest as pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_attempt, stop_after_delay, wait_exponential - -from . import architecture -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - METADATA, - change_primary_start_timeout, - check_tls, - check_tls_patroni_api, - check_tls_replication, - db_connect, - get_password, - get_primary, - get_unit_address, - primary_changed, - restart_machine, - run_command_on_unit, -) -from .juju_ import juju_major_version - -logger = logging.getLogger(__name__) - -APP_NAME = METADATA["name"] -if juju_major_version < 3: - tls_certificates_app_name = "tls-certificates-operator" - if architecture.architecture == "arm64": - tls_channel = "legacy/edge" - else: - tls_channel = "legacy/stable" - tls_config = {"generate-self-signed-certificates": "true", "ca-common-name": "Test CA"} -else: - tls_certificates_app_name = "self-signed-certificates" - if architecture.architecture == "arm64": - tls_channel = "latest/edge" - else: - tls_channel = "latest/stable" - tls_config = {"ca-common-name": "Test CA"} - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy_active(ops_test: OpsTest): - """Build the charm and deploy it.""" - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=APP_NAME, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - # No wait between deploying charms, since we can't guarantee users will wait. Furthermore, - # bundles don't wait between deploying charms. - - -@pytest.mark.group(1) -async def test_tls_enabled(ops_test: OpsTest) -> None: - """Test that TLS is enabled when relating to the TLS Certificates Operator.""" - async with ops_test.fast_forward(): - # Deploy TLS Certificates operator. - await ops_test.model.deploy( - tls_certificates_app_name, config=tls_config, channel=tls_channel - ) - - # Relate it to the PostgreSQL to enable TLS. - await ops_test.model.relate(DATABASE_APP_NAME, tls_certificates_app_name) - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - # Wait for all units enabling TLS. - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - assert await check_tls(ops_test, unit.name, enabled=True) - assert await check_tls_patroni_api(ops_test, unit.name, enabled=True) - - # Test TLS being used by pg_rewind. To accomplish that, get the primary unit - # and a replica that will be promoted to primary (this should trigger a rewind - # operation when the old primary is started again). - any_unit = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary = await get_primary(ops_test, any_unit) - replica = [ - unit.name - for unit in ops_test.model.applications[DATABASE_APP_NAME].units - if unit.name != primary - ][0] - - # Check if TLS enabled for replication - assert await check_tls_replication(ops_test, primary, enabled=True) - - # Enable additional logs on the PostgreSQL instance to check TLS - # being used in a later step and make the fail-over to happens faster. - await ops_test.model.applications[DATABASE_APP_NAME].set_config({ - "logging_log_connections": "True" - }) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", idle_period=30 - ) - change_primary_start_timeout(ops_test, primary, 0) - - for attempt in Retrying( - stop=stop_after_delay(60 * 5), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - # Promote the replica to primary. - await run_command_on_unit( - ops_test, - replica, - "sudo -u snap_daemon charmed-postgresql.pg-ctl -D /var/snap/charmed-postgresql/common/var/lib/postgresql/ promote", - ) - - # Check that the replica was promoted. - host = get_unit_address(ops_test, replica) - password = await get_password(ops_test, replica) - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - cursor.execute("SELECT pg_is_in_recovery();") - in_recovery = cursor.fetchone()[0] - print(f"in_recovery: {in_recovery}") - assert not in_recovery - connection.close() - - # Write some data to the initial primary (this causes a divergence - # in the instances' timelines). - host = get_unit_address(ops_test, primary) - password = await get_password(ops_test, primary) - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - cursor.execute("CREATE TABLE IF NOT EXISTS pgrewindtest (testcol INT);") - cursor.execute("INSERT INTO pgrewindtest SELECT generate_series(1,1000);") - connection.close() - - # Stop the initial primary by killing both Patroni and PostgreSQL OS processes. - await run_command_on_unit( - ops_test, - primary, - "pkill --signal SIGKILL -f /snap/charmed-postgresql/current/usr/lib/postgresql/14/bin/postgres", - ) - await run_command_on_unit( - ops_test, - primary, - "pkill --signal SIGKILL -f /snap/charmed-postgresql/[0-9]*/usr/bin/patroni", - ) - - # Check that the primary changed. - assert await primary_changed(ops_test, primary), "primary not changed" - change_primary_start_timeout(ops_test, primary, 300) - - # Check the logs to ensure TLS is being used by pg_rewind. - primary = await get_primary(ops_test, primary) - await run_command_on_unit( - ops_test, - primary, - "grep 'connection authorized: user=rewind database=postgres SSL enabled' /var/snap/charmed-postgresql/common/var/log/postgresql/postgresql-*.log", - ) - - # Remove the relation. - await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( - f"{DATABASE_APP_NAME}:certificates", f"{tls_certificates_app_name}:certificates" - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) - - # Wait for all units disabling TLS. - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - assert await check_tls(ops_test, unit.name, enabled=False) - assert await check_tls_patroni_api(ops_test, unit.name, enabled=False) - - -@pytest.mark.group(1) -@pytest.mark.skipif( - not os.environ.get("RESTART_MACHINE_TEST"), - reason="RESTART_MACHINE_TEST environment variable not set", -) -async def test_restart_machine(ops_test: OpsTest) -> None: - async with ops_test.fast_forward(): - # Relate it to the PostgreSQL to enable TLS. - await ops_test.model.relate(DATABASE_APP_NAME, tls_certificates_app_name) - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Wait for all units enabling TLS. - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - assert await check_tls(ops_test, unit.name, enabled=True) - assert await check_tls_patroni_api(ops_test, unit.name, enabled=True) - - unit_name = "postgresql/0" - issue_found = False - for attempt in Retrying(stop=stop_after_attempt(10)): - with attempt: - # Restart the machine of the unit. - logger.info(f"restarting {unit_name}") - await restart_machine(ops_test, unit_name) - - # Check whether the issue happened (the storage wasn't mounted). - logger.info( - f"checking whether storage was mounted - attempt {attempt.retry_state.attempt_number}" - ) - result = await run_command_on_unit(ops_test, unit_name, "lsblk") - if "/var/lib/postgresql/data" not in result: - issue_found = True - - assert ( - issue_found - ), "Couldn't reproduce the issue from https://bugs.launchpad.net/juju/+bug/1999758" - - # Wait for the unit to be ready again. Some errors in the start hook may happen due - # to rebooting in the middle of a hook. - await ops_test.model.wait_for_idle(status="active", timeout=1000, raise_on_error=False) - - # Wait for the unit enabling TLS again. - logger.info(f"checking TLS on {unit_name}") - assert await check_tls(ops_test, "postgresql/0", enabled=True) - logger.info(f"checking TLS on Patroni API from {unit_name}") - assert await check_tls_patroni_api(ops_test, "postgresql/0", enabled=True) From 266dc71aeeab6e0a105fe07ba9325ffd29988d5f Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Tue, 18 Jun 2024 13:56:01 -0300 Subject: [PATCH 11/13] Remove unnecessary tests Signed-off-by: Marcelo Henrique Neppel --- .../new_relations/test_new_relations.py | 611 ------------------ tests/integration/relations/test_relations.py | 145 ----- 2 files changed, 756 deletions(-) delete mode 100644 tests/integration/new_relations/test_new_relations.py delete mode 100644 tests/integration/relations/test_relations.py diff --git a/tests/integration/new_relations/test_new_relations.py b/tests/integration/new_relations/test_new_relations.py deleted file mode 100644 index 779d50d573..0000000000 --- a/tests/integration/new_relations/test_new_relations.py +++ /dev/null @@ -1,611 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import asyncio -import logging -import secrets -import string -from pathlib import Path - -import psycopg2 -import pytest -import yaml -from pytest_operator.plugin import OpsTest - -from .. import markers -from ..helpers import CHARM_SERIES, assert_sync_standbys, get_leader_unit, scale_application -from ..juju_ import juju_major_version -from .helpers import ( - build_connection_string, - check_relation_data_existence, - get_application_relation_data, -) - -logger = logging.getLogger(__name__) - -APPLICATION_APP_NAME = "postgresql-test-app" -DATABASE_APP_NAME = "database" -ANOTHER_DATABASE_APP_NAME = "another-database" -DATA_INTEGRATOR_APP_NAME = "data-integrator" -APP_NAMES = [APPLICATION_APP_NAME, DATABASE_APP_NAME, ANOTHER_DATABASE_APP_NAME] -DATABASE_APP_METADATA = yaml.safe_load(Path("./metadata.yaml").read_text()) -FIRST_DATABASE_RELATION_NAME = "first-database" -SECOND_DATABASE_RELATION_NAME = "second-database" -MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME = "multiple-database-clusters" -ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME = "aliased-multiple-database-clusters" -NO_DATABASE_RELATION_NAME = "no-database" -INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE = "invalid role(s) for extra user roles" - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_deploy_charms(ops_test: OpsTest, charm): - """Deploy both charms (application and database) to use in the tests.""" - # Deploy both charms (multiple units for each application to test that later they correctly - # set data in the relation application databag using only the leader unit). - async with ops_test.fast_forward(): - await asyncio.gather( - ops_test.model.deploy( - APPLICATION_APP_NAME, - application_name=APPLICATION_APP_NAME, - num_units=2, - series=CHARM_SERIES, - channel="edge", - ), - ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=1, - series=CHARM_SERIES, - config={"profile": "testing"}, - ), - ops_test.model.deploy( - charm, - application_name=ANOTHER_DATABASE_APP_NAME, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, - ), - ) - - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", timeout=3000) - - -@pytest.mark.group(1) -async def test_no_read_only_endpoint_in_standalone_cluster(ops_test: OpsTest): - """Test that there is no read-only endpoint in a standalone cluster.""" - async with ops_test.fast_forward(): - # Ensure the cluster starts with only one member. - # We can't scale down a running cluster to 1 unit because the way - # Patroni raft implementation works (to scale from 2 units to 1 Patroni - # needs at least one mode unit that run only raft to have quorum). - assert len(ops_test.model.applications[DATABASE_APP_NAME].units) == 1 - - # Relate the charms and wait for them exchanging some connection data. - await ops_test.model.add_relation( - f"{APPLICATION_APP_NAME}:{FIRST_DATABASE_RELATION_NAME}", DATABASE_APP_NAME - ) - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") - - # Check that on juju 3 we have secrets and no username and password in the rel databag - if juju_major_version > 2: - logger.info("checking for secrets") - secret_uri, password = await asyncio.gather( - get_application_relation_data( - ops_test, - APPLICATION_APP_NAME, - FIRST_DATABASE_RELATION_NAME, - "secret-user", - ), - get_application_relation_data( - ops_test, - APPLICATION_APP_NAME, - FIRST_DATABASE_RELATION_NAME, - "password", - ), - ) - assert secret_uri is not None - assert password is None - - # Try to get the connection string of the database using the read-only endpoint. - # It should not be available. - assert await check_relation_data_existence( - ops_test, - APPLICATION_APP_NAME, - FIRST_DATABASE_RELATION_NAME, - "read-only-endpoints", - exists=False, - ) - - -@pytest.mark.group(1) -async def test_read_only_endpoint_in_scaled_up_cluster(ops_test: OpsTest): - """Test that there is read-only endpoint in a scaled up cluster.""" - async with ops_test.fast_forward(): - # Scale up the database. - await scale_application(ops_test, DATABASE_APP_NAME, 2) - - # Try to get the connection string of the database using the read-only endpoint. - # It should be available again. - assert await check_relation_data_existence( - ops_test, - APPLICATION_APP_NAME, - FIRST_DATABASE_RELATION_NAME, - "read-only-endpoints", - exists=True, - ) - - -@pytest.mark.group(1) -async def test_database_relation_with_charm_libraries(ops_test: OpsTest): - """Test basic functionality of database relation interface.""" - # Get the connection string to connect to the database using the read/write endpoint. - connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME - ) - - # Connect to the database using the read/write endpoint. - with psycopg2.connect(connection_string) as connection, connection.cursor() as cursor: - # Check that it's possible to write and read data from the database that - # was created for the application. - connection.autocommit = True - cursor.execute("DROP TABLE IF EXISTS test;") - cursor.execute("CREATE TABLE test(data TEXT);") - cursor.execute("INSERT INTO test(data) VALUES('some data');") - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - - # Check the version that the application received is the same on the database server. - cursor.execute("SELECT version();") - data = cursor.fetchone()[0].split(" ")[1] - - # Get the version of the database and compare with the information that - # was retrieved directly from the database. - version = await get_application_relation_data( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME, "version" - ) - assert version == data - - # Get the connection string to connect to the database using the read-only endpoint. - connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME, read_only_endpoint=True - ) - - # Connect to the database using the read-only endpoint. - with psycopg2.connect(connection_string) as connection, connection.cursor() as cursor: - # Read some data. - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - - # Try to alter some data in a read-only transaction. - with pytest.raises(psycopg2.errors.ReadOnlySqlTransaction): - cursor.execute("DROP TABLE test;") - - -@pytest.mark.group(1) -async def test_user_with_extra_roles(ops_test: OpsTest): - """Test superuser actions and the request for more permissions.""" - # Get the connection string to connect to the database. - connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME - ) - - # Connect to the database. - connection = psycopg2.connect(connection_string) - connection.autocommit = True - cursor = connection.cursor() - - # Test the user can create a database and another user. - cursor.execute("CREATE DATABASE another_database;") - cursor.execute("CREATE USER another_user WITH ENCRYPTED PASSWORD 'test-password';") - - cursor.close() - connection.close() - - -@pytest.mark.group(1) -async def test_two_applications_doesnt_share_the_same_relation_data(ops_test: OpsTest): - """Test that two different application connect to the database with different credentials.""" - # Set some variables to use in this test. - another_application_app_name = "another-application" - all_app_names = [another_application_app_name] - all_app_names.extend(APP_NAMES) - - # Deploy another application. - await ops_test.model.deploy( - APPLICATION_APP_NAME, - application_name=another_application_app_name, - channel="edge", - ) - await ops_test.model.wait_for_idle(apps=all_app_names, status="active") - - # Relate the new application with the database - # and wait for them exchanging some connection data. - await ops_test.model.add_relation( - f"{another_application_app_name}:{FIRST_DATABASE_RELATION_NAME}", DATABASE_APP_NAME - ) - await ops_test.model.wait_for_idle(apps=all_app_names, status="active") - - # Assert the two application have different relation (connection) data. - application_connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME - ) - another_application_connection_string = await build_connection_string( - ops_test, another_application_app_name, FIRST_DATABASE_RELATION_NAME - ) - - assert application_connection_string != another_application_connection_string - - # Check that the user cannot access other databases. - for application, other_application_database in [ - (APPLICATION_APP_NAME, "another_application_first_database"), - (another_application_app_name, f"{APPLICATION_APP_NAME.replace('-', '_')}_first_database"), - ]: - connection_string = await build_connection_string( - ops_test, application, FIRST_DATABASE_RELATION_NAME, database="postgres" - ) - with pytest.raises(psycopg2.Error): - psycopg2.connect(connection_string) - connection_string = await build_connection_string( - ops_test, - application, - FIRST_DATABASE_RELATION_NAME, - database=other_application_database, - ) - with pytest.raises(psycopg2.Error): - psycopg2.connect(connection_string) - - -@pytest.mark.group(1) -async def test_an_application_can_connect_to_multiple_database_clusters(ops_test: OpsTest): - """Test that an application can connect to different clusters of the same database.""" - # Relate the application with both database clusters - # and wait for them exchanging some connection data. - first_cluster_relation = await ops_test.model.add_relation( - f"{APPLICATION_APP_NAME}:{MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", DATABASE_APP_NAME - ) - second_cluster_relation = await ops_test.model.add_relation( - f"{APPLICATION_APP_NAME}:{MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", - ANOTHER_DATABASE_APP_NAME, - ) - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") - - # Retrieve the connection string to both database clusters using the relation aliases - # and assert they are different. - application_connection_string = await build_connection_string( - ops_test, - APPLICATION_APP_NAME, - MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, - relation_id=first_cluster_relation.id, - ) - another_application_connection_string = await build_connection_string( - ops_test, - APPLICATION_APP_NAME, - MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, - relation_id=second_cluster_relation.id, - ) - assert application_connection_string != another_application_connection_string - - -@pytest.mark.group(1) -async def test_an_application_can_connect_to_multiple_aliased_database_clusters(ops_test: OpsTest): - """Test that an application can connect to different clusters of the same database.""" - # Relate the application with both database clusters - # and wait for them exchanging some connection data. - await asyncio.gather( - ops_test.model.add_relation( - f"{APPLICATION_APP_NAME}:{ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", - DATABASE_APP_NAME, - ), - ops_test.model.add_relation( - f"{APPLICATION_APP_NAME}:{ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", - ANOTHER_DATABASE_APP_NAME, - ), - ) - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") - - # Retrieve the connection string to both database clusters using the relation aliases - # and assert they are different. - application_connection_string = await build_connection_string( - ops_test, - APPLICATION_APP_NAME, - ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, - relation_alias="cluster1", - ) - another_application_connection_string = await build_connection_string( - ops_test, - APPLICATION_APP_NAME, - ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, - relation_alias="cluster2", - ) - assert application_connection_string != another_application_connection_string - - -@pytest.mark.group(1) -async def test_an_application_can_request_multiple_databases(ops_test: OpsTest): - """Test that an application can request additional databases using the same interface.""" - # Relate the charms using another relation and wait for them exchanging some connection data. - await ops_test.model.add_relation( - f"{APPLICATION_APP_NAME}:{SECOND_DATABASE_RELATION_NAME}", DATABASE_APP_NAME - ) - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") - - # Get the connection strings to connect to both databases. - first_database_connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME - ) - second_database_connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, SECOND_DATABASE_RELATION_NAME - ) - - # Assert the two application have different relation (connection) data. - assert first_database_connection_string != second_database_connection_string - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_relation_data_is_updated_correctly_when_scaling(ops_test: OpsTest): - """Test that relation data, like connection data, is updated correctly when scaling.""" - # Retrieve the list of current database unit names. - units_to_remove = [unit.name for unit in ops_test.model.applications[DATABASE_APP_NAME].units] - - async with ops_test.fast_forward(fast_interval="60s"): - # Add two more units. - await ops_test.model.applications[DATABASE_APP_NAME].add_units(2) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=1500, wait_for_exact_units=4 - ) - - assert_sync_standbys( - ops_test.model.applications[DATABASE_APP_NAME].units[0].public_address, 2 - ) - - # Remove the original units. - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(*[ - unit for unit in units_to_remove if unit != leader_unit.name - ]) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=3 - ) - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(leader_unit.name) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=2 - ) - - # Get the updated connection data and assert it can be used - # to write and read some data properly. - primary_connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME - ) - replica_connection_string = await build_connection_string( - ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME, read_only_endpoint=True - ) - - # Connect to the database using the primary connection string. - with psycopg2.connect(primary_connection_string) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - # Check that it's possible to write and read data from the database that - # was created for the application. - cursor.execute("DROP TABLE IF EXISTS test;") - cursor.execute("CREATE TABLE test(data TEXT);") - cursor.execute("INSERT INTO test(data) VALUES('some data');") - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - connection.close() - - # Connect to the database using the replica endpoint. - with psycopg2.connect(replica_connection_string) as connection: - with connection.cursor() as cursor: - # Read some data. - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - - # Try to alter some data in a read-only transaction. - with pytest.raises(psycopg2.errors.ReadOnlySqlTransaction): - cursor.execute("DROP TABLE test;") - connection.close() - - async with ops_test.fast_forward(): - # Remove the relation and test that its user was deleted - # (by checking that the connection string doesn't work anymore). - await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( - f"{DATABASE_APP_NAME}:database", - f"{APPLICATION_APP_NAME}:{FIRST_DATABASE_RELATION_NAME}", - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) - with pytest.raises(psycopg2.OperationalError): - psycopg2.connect(primary_connection_string) - - -@pytest.mark.group(1) -async def test_relation_with_no_database_name(ops_test: OpsTest): - """Test that a relation with no database name doesn't block the charm.""" - async with ops_test.fast_forward(): - # Relate the charms using a relation that doesn't provide a database name. - await ops_test.model.add_relation( - f"{APPLICATION_APP_NAME}:{NO_DATABASE_RELATION_NAME}", DATABASE_APP_NAME - ) - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", raise_on_blocked=True) - - # Break the relation. - await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( - f"{DATABASE_APP_NAME}", f"{APPLICATION_APP_NAME}:{NO_DATABASE_RELATION_NAME}" - ) - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", raise_on_blocked=True) - - -@pytest.mark.group(1) -async def test_admin_role(ops_test: OpsTest): - """Test that the admin role gives access to all the databases.""" - all_app_names = [DATA_INTEGRATOR_APP_NAME] - all_app_names.extend(APP_NAMES) - async with ops_test.fast_forward(): - await ops_test.model.deploy(DATA_INTEGRATOR_APP_NAME) - await ops_test.model.wait_for_idle(apps=[DATA_INTEGRATOR_APP_NAME], status="blocked") - await ops_test.model.applications[DATA_INTEGRATOR_APP_NAME].set_config({ - "database-name": DATA_INTEGRATOR_APP_NAME.replace("-", "_"), - "extra-user-roles": "admin", - }) - await ops_test.model.wait_for_idle(apps=[DATA_INTEGRATOR_APP_NAME], status="blocked") - await ops_test.model.add_relation(DATA_INTEGRATOR_APP_NAME, DATABASE_APP_NAME) - await ops_test.model.wait_for_idle(apps=all_app_names, status="active") - - # Check that the user can access all the databases. - for database in [ - "postgres", - f"{APPLICATION_APP_NAME.replace('-', '_')}_first_database", - "another_application_first_database", - ]: - logger.info(f"connecting to the following database: {database}") - connection_string = await build_connection_string( - ops_test, DATA_INTEGRATOR_APP_NAME, "postgresql", database=database - ) - connection = None - should_fail = False - try: - with psycopg2.connect(connection_string) as connection, connection.cursor() as cursor: - # Check the version that the application received is the same on the - # database server. - cursor.execute("SELECT version();") - data = cursor.fetchone()[0].split(" ")[1] - - # Get the version of the database and compare with the information that - # was retrieved directly from the database. - version = await get_application_relation_data( - ops_test, DATA_INTEGRATOR_APP_NAME, "postgresql", "version" - ) - assert version == data - - # Write some data (it should fail in the "postgres" database). - random_name = ( - f"test_{''.join(secrets.choice(string.ascii_lowercase) for _ in range(10))}" - ) - should_fail = database == "postgres" - cursor.execute(f"CREATE TABLE {random_name}(data TEXT);") - if should_fail: - assert ( - False - ), f"failed to run a statement in the following database: {database}" - except psycopg2.errors.InsufficientPrivilege as e: - if not should_fail: - logger.exception(e) - assert ( - False - ), f"failed to connect to or run a statement in the following database: {database}" - finally: - if connection is not None: - connection.close() - - # Test the creation and deletion of databases. - connection_string = await build_connection_string( - ops_test, DATA_INTEGRATOR_APP_NAME, "postgresql", database="postgres" - ) - connection = psycopg2.connect(connection_string) - connection.autocommit = True - cursor = connection.cursor() - random_name = f"test_{''.join(secrets.choice(string.ascii_lowercase) for _ in range(10))}" - cursor.execute(f"CREATE DATABASE {random_name};") - cursor.execute(f"DROP DATABASE {random_name};") - try: - cursor.execute("DROP DATABASE postgres;") - assert False, "the admin extra user role was able to drop the `postgres` system database" - except psycopg2.errors.InsufficientPrivilege: - # Ignore the error, as the admin extra user role mustn't be able to drop - # the "postgres" system database. - pass - finally: - connection.close() - - -@pytest.mark.group(1) -async def test_invalid_extra_user_roles(ops_test: OpsTest): - async with ops_test.fast_forward(): - # Remove the relation between the database and the first data integrator. - await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( - DATABASE_APP_NAME, DATA_INTEGRATOR_APP_NAME - ) - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", raise_on_blocked=True) - - another_data_integrator_app_name = f"another-{DATA_INTEGRATOR_APP_NAME}" - data_integrator_apps_names = [DATA_INTEGRATOR_APP_NAME, another_data_integrator_app_name] - await ops_test.model.deploy( - DATA_INTEGRATOR_APP_NAME, application_name=another_data_integrator_app_name - ) - await ops_test.model.wait_for_idle( - apps=[another_data_integrator_app_name], status="blocked" - ) - for app in data_integrator_apps_names: - await ops_test.model.applications[app].set_config({ - "database-name": app.replace("-", "_"), - "extra-user-roles": "test", - }) - await ops_test.model.wait_for_idle(apps=data_integrator_apps_names, status="blocked") - for app in data_integrator_apps_names: - await ops_test.model.add_relation(f"{app}:postgresql", f"{DATABASE_APP_NAME}:database") - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME]) - await ops_test.model.block_until( - lambda: any( - unit.workload_status_message == INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE - for unit in ops_test.model.applications[DATABASE_APP_NAME].units - ), - timeout=1000, - ) - - # Verify that the charm remains blocked if there are still other relations with invalid - # extra user roles. - await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( - f"{DATABASE_APP_NAME}:database", f"{DATA_INTEGRATOR_APP_NAME}:postgresql" - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME]) - ops_test.model.block_until( - lambda: any( - unit.workload_status_message == INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE - for unit in ops_test.model.applications[DATABASE_APP_NAME].units - ), - timeout=1000, - ) - - # Verify that active status is restored after all relations are removed. - await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( - f"{DATABASE_APP_NAME}:database", f"{another_data_integrator_app_name}:postgresql" - ) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="active", - raise_on_blocked=False, - timeout=1000, - ) - - -@pytest.mark.group(1) -@markers.amd64_only # nextcloud charm not available for arm64 -async def test_nextcloud_db_blocked(ops_test: OpsTest, charm: str) -> None: - async with ops_test.fast_forward(): - # Deploy Nextcloud. - await ops_test.model.deploy( - "nextcloud", - channel="edge", - application_name="nextcloud", - num_units=1, - ) - await ops_test.model.wait_for_idle( - apps=["nextcloud"], - status="blocked", - raise_on_blocked=False, - timeout=1000, - ) - - await ops_test.model.relate("nextcloud:database", f"{DATABASE_APP_NAME}:database") - - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, "nextcloud"], - status="active", - raise_on_blocked=False, - timeout=1000, - ) diff --git a/tests/integration/relations/test_relations.py b/tests/integration/relations/test_relations.py deleted file mode 100644 index 58a0462ceb..0000000000 --- a/tests/integration/relations/test_relations.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. -import asyncio -import logging - -import psycopg2 -import pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from ..helpers import CHARM_SERIES, METADATA -from ..new_relations.test_new_relations import APPLICATION_APP_NAME, build_connection_string -from ..relations.helpers import get_legacy_db_connection_str - -logger = logging.getLogger(__name__) - -APP_NAME = METADATA["name"] -# MAILMAN3_CORE_APP_NAME = "mailman3-core" -DB_RELATION = "db" -DATABASE_RELATION = "database" -FIRST_DATABASE_RELATION = "first-database" -DATABASE_APP_NAME = "database-app" -DB_APP_NAME = "db-app" -APP_NAMES = [APP_NAME, DATABASE_APP_NAME, DB_APP_NAME] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_deploy_charms(ops_test: OpsTest, charm): - """Deploy both charms (application and database) to use in the tests.""" - # Deploy both charms (multiple units for each application to test that later they correctly - # set data in the relation application databag using only the leader unit). - async with ops_test.fast_forward(): - await asyncio.gather( - ops_test.model.deploy( - APPLICATION_APP_NAME, - application_name=DATABASE_APP_NAME, - num_units=1, - series=CHARM_SERIES, - channel="edge", - ), - ops_test.model.deploy( - charm, - application_name=APP_NAME, - num_units=1, - series=CHARM_SERIES, - config={ - "profile": "testing", - "plugin_unaccent_enable": "True", - "plugin_pg_trgm_enable": "True", - }, - ), - ops_test.model.deploy( - APPLICATION_APP_NAME, - application_name=DB_APP_NAME, - num_units=1, - series=CHARM_SERIES, - channel="edge", - ), - ) - - await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", timeout=3000) - - -@pytest.mark.group(1) -async def test_legacy_endpoint_with_multiple_related_endpoints(ops_test: OpsTest): - await ops_test.model.relate(f"{DB_APP_NAME}:{DB_RELATION}", f"{APP_NAME}:{DB_RELATION}") - await ops_test.model.relate(APP_NAME, f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}") - - app = ops_test.model.applications[APP_NAME] - await ops_test.model.block_until( - lambda: "blocked" in {unit.workload_status for unit in app.units}, - timeout=1500, - ) - - logger.info(" remove relation with modern endpoints") - await ops_test.model.applications[APP_NAME].remove_relation( - f"{APP_NAME}:{DATABASE_RELATION}", f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}" - ) - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle( - status="active", - timeout=1500, - raise_on_error=False, - ) - - legacy_interface_connect = await get_legacy_db_connection_str( - ops_test, DB_APP_NAME, DB_RELATION, remote_unit_name=f"{APP_NAME}/0" - ) - logger.info(f" check connect to = {legacy_interface_connect}") - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): - with attempt: - with psycopg2.connect(legacy_interface_connect) as connection: - assert connection.status == psycopg2.extensions.STATUS_READY - - logger.info(f" remove relation {DB_APP_NAME}:{DB_RELATION}") - async with ops_test.fast_forward(): - await ops_test.model.applications[APP_NAME].remove_relation( - f"{APP_NAME}:{DB_RELATION}", f"{DB_APP_NAME}:{DB_RELATION}" - ) - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - for attempt in Retrying(stop=stop_after_delay(60 * 5), wait=wait_fixed(10)): - with attempt: - with pytest.raises(psycopg2.OperationalError): - psycopg2.connect(legacy_interface_connect) - - -@pytest.mark.group(1) -async def test_modern_endpoint_with_multiple_related_endpoints(ops_test: OpsTest): - await ops_test.model.relate(f"{DB_APP_NAME}:{DB_RELATION}", f"{APP_NAME}:{DB_RELATION}") - await ops_test.model.relate(APP_NAME, f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}") - - app = ops_test.model.applications[APP_NAME] - await ops_test.model.block_until( - lambda: "blocked" in {unit.workload_status for unit in app.units}, - timeout=1500, - ) - - logger.info(" remove relation with legacy endpoints") - await ops_test.model.applications[APP_NAME].remove_relation( - f"{DB_APP_NAME}:{DB_RELATION}", f"{APP_NAME}:{DB_RELATION}" - ) - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=3000, raise_on_error=False) - - modern_interface_connect = await build_connection_string( - ops_test, DATABASE_APP_NAME, FIRST_DATABASE_RELATION - ) - logger.info(f"check connect to = {modern_interface_connect}") - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): - with attempt: - with psycopg2.connect(modern_interface_connect) as connection: - assert connection.status == psycopg2.extensions.STATUS_READY - - logger.info(f"remove relation {DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}") - async with ops_test.fast_forward(): - await ops_test.model.applications[APP_NAME].remove_relation( - f"{APP_NAME}:{DATABASE_RELATION}", f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}" - ) - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - for attempt in Retrying(stop=stop_after_delay(60 * 5), wait=wait_fixed(10)): - with attempt: - with pytest.raises(psycopg2.OperationalError): - psycopg2.connect(modern_interface_connect) From 6eb6229c15bdd008f0ba2511c4bcf23cee40af09 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Tue, 18 Jun 2024 14:44:15 -0300 Subject: [PATCH 12/13] Revert "Remove unnecessary tests" This reverts commit 266dc71aeeab6e0a105fe07ba9325ffd29988d5f. Signed-off-by: Marcelo Henrique Neppel --- .../new_relations/test_new_relations.py | 611 ++++++++++++++++++ tests/integration/relations/test_relations.py | 145 +++++ 2 files changed, 756 insertions(+) create mode 100644 tests/integration/new_relations/test_new_relations.py create mode 100644 tests/integration/relations/test_relations.py diff --git a/tests/integration/new_relations/test_new_relations.py b/tests/integration/new_relations/test_new_relations.py new file mode 100644 index 0000000000..779d50d573 --- /dev/null +++ b/tests/integration/new_relations/test_new_relations.py @@ -0,0 +1,611 @@ +#!/usr/bin/env python3 +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. +import asyncio +import logging +import secrets +import string +from pathlib import Path + +import psycopg2 +import pytest +import yaml +from pytest_operator.plugin import OpsTest + +from .. import markers +from ..helpers import CHARM_SERIES, assert_sync_standbys, get_leader_unit, scale_application +from ..juju_ import juju_major_version +from .helpers import ( + build_connection_string, + check_relation_data_existence, + get_application_relation_data, +) + +logger = logging.getLogger(__name__) + +APPLICATION_APP_NAME = "postgresql-test-app" +DATABASE_APP_NAME = "database" +ANOTHER_DATABASE_APP_NAME = "another-database" +DATA_INTEGRATOR_APP_NAME = "data-integrator" +APP_NAMES = [APPLICATION_APP_NAME, DATABASE_APP_NAME, ANOTHER_DATABASE_APP_NAME] +DATABASE_APP_METADATA = yaml.safe_load(Path("./metadata.yaml").read_text()) +FIRST_DATABASE_RELATION_NAME = "first-database" +SECOND_DATABASE_RELATION_NAME = "second-database" +MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME = "multiple-database-clusters" +ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME = "aliased-multiple-database-clusters" +NO_DATABASE_RELATION_NAME = "no-database" +INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE = "invalid role(s) for extra user roles" + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_deploy_charms(ops_test: OpsTest, charm): + """Deploy both charms (application and database) to use in the tests.""" + # Deploy both charms (multiple units for each application to test that later they correctly + # set data in the relation application databag using only the leader unit). + async with ops_test.fast_forward(): + await asyncio.gather( + ops_test.model.deploy( + APPLICATION_APP_NAME, + application_name=APPLICATION_APP_NAME, + num_units=2, + series=CHARM_SERIES, + channel="edge", + ), + ops_test.model.deploy( + charm, + application_name=DATABASE_APP_NAME, + num_units=1, + series=CHARM_SERIES, + config={"profile": "testing"}, + ), + ops_test.model.deploy( + charm, + application_name=ANOTHER_DATABASE_APP_NAME, + num_units=2, + series=CHARM_SERIES, + config={"profile": "testing"}, + ), + ) + + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", timeout=3000) + + +@pytest.mark.group(1) +async def test_no_read_only_endpoint_in_standalone_cluster(ops_test: OpsTest): + """Test that there is no read-only endpoint in a standalone cluster.""" + async with ops_test.fast_forward(): + # Ensure the cluster starts with only one member. + # We can't scale down a running cluster to 1 unit because the way + # Patroni raft implementation works (to scale from 2 units to 1 Patroni + # needs at least one mode unit that run only raft to have quorum). + assert len(ops_test.model.applications[DATABASE_APP_NAME].units) == 1 + + # Relate the charms and wait for them exchanging some connection data. + await ops_test.model.add_relation( + f"{APPLICATION_APP_NAME}:{FIRST_DATABASE_RELATION_NAME}", DATABASE_APP_NAME + ) + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") + + # Check that on juju 3 we have secrets and no username and password in the rel databag + if juju_major_version > 2: + logger.info("checking for secrets") + secret_uri, password = await asyncio.gather( + get_application_relation_data( + ops_test, + APPLICATION_APP_NAME, + FIRST_DATABASE_RELATION_NAME, + "secret-user", + ), + get_application_relation_data( + ops_test, + APPLICATION_APP_NAME, + FIRST_DATABASE_RELATION_NAME, + "password", + ), + ) + assert secret_uri is not None + assert password is None + + # Try to get the connection string of the database using the read-only endpoint. + # It should not be available. + assert await check_relation_data_existence( + ops_test, + APPLICATION_APP_NAME, + FIRST_DATABASE_RELATION_NAME, + "read-only-endpoints", + exists=False, + ) + + +@pytest.mark.group(1) +async def test_read_only_endpoint_in_scaled_up_cluster(ops_test: OpsTest): + """Test that there is read-only endpoint in a scaled up cluster.""" + async with ops_test.fast_forward(): + # Scale up the database. + await scale_application(ops_test, DATABASE_APP_NAME, 2) + + # Try to get the connection string of the database using the read-only endpoint. + # It should be available again. + assert await check_relation_data_existence( + ops_test, + APPLICATION_APP_NAME, + FIRST_DATABASE_RELATION_NAME, + "read-only-endpoints", + exists=True, + ) + + +@pytest.mark.group(1) +async def test_database_relation_with_charm_libraries(ops_test: OpsTest): + """Test basic functionality of database relation interface.""" + # Get the connection string to connect to the database using the read/write endpoint. + connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME + ) + + # Connect to the database using the read/write endpoint. + with psycopg2.connect(connection_string) as connection, connection.cursor() as cursor: + # Check that it's possible to write and read data from the database that + # was created for the application. + connection.autocommit = True + cursor.execute("DROP TABLE IF EXISTS test;") + cursor.execute("CREATE TABLE test(data TEXT);") + cursor.execute("INSERT INTO test(data) VALUES('some data');") + cursor.execute("SELECT data FROM test;") + data = cursor.fetchone() + assert data[0] == "some data" + + # Check the version that the application received is the same on the database server. + cursor.execute("SELECT version();") + data = cursor.fetchone()[0].split(" ")[1] + + # Get the version of the database and compare with the information that + # was retrieved directly from the database. + version = await get_application_relation_data( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME, "version" + ) + assert version == data + + # Get the connection string to connect to the database using the read-only endpoint. + connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME, read_only_endpoint=True + ) + + # Connect to the database using the read-only endpoint. + with psycopg2.connect(connection_string) as connection, connection.cursor() as cursor: + # Read some data. + cursor.execute("SELECT data FROM test;") + data = cursor.fetchone() + assert data[0] == "some data" + + # Try to alter some data in a read-only transaction. + with pytest.raises(psycopg2.errors.ReadOnlySqlTransaction): + cursor.execute("DROP TABLE test;") + + +@pytest.mark.group(1) +async def test_user_with_extra_roles(ops_test: OpsTest): + """Test superuser actions and the request for more permissions.""" + # Get the connection string to connect to the database. + connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME + ) + + # Connect to the database. + connection = psycopg2.connect(connection_string) + connection.autocommit = True + cursor = connection.cursor() + + # Test the user can create a database and another user. + cursor.execute("CREATE DATABASE another_database;") + cursor.execute("CREATE USER another_user WITH ENCRYPTED PASSWORD 'test-password';") + + cursor.close() + connection.close() + + +@pytest.mark.group(1) +async def test_two_applications_doesnt_share_the_same_relation_data(ops_test: OpsTest): + """Test that two different application connect to the database with different credentials.""" + # Set some variables to use in this test. + another_application_app_name = "another-application" + all_app_names = [another_application_app_name] + all_app_names.extend(APP_NAMES) + + # Deploy another application. + await ops_test.model.deploy( + APPLICATION_APP_NAME, + application_name=another_application_app_name, + channel="edge", + ) + await ops_test.model.wait_for_idle(apps=all_app_names, status="active") + + # Relate the new application with the database + # and wait for them exchanging some connection data. + await ops_test.model.add_relation( + f"{another_application_app_name}:{FIRST_DATABASE_RELATION_NAME}", DATABASE_APP_NAME + ) + await ops_test.model.wait_for_idle(apps=all_app_names, status="active") + + # Assert the two application have different relation (connection) data. + application_connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME + ) + another_application_connection_string = await build_connection_string( + ops_test, another_application_app_name, FIRST_DATABASE_RELATION_NAME + ) + + assert application_connection_string != another_application_connection_string + + # Check that the user cannot access other databases. + for application, other_application_database in [ + (APPLICATION_APP_NAME, "another_application_first_database"), + (another_application_app_name, f"{APPLICATION_APP_NAME.replace('-', '_')}_first_database"), + ]: + connection_string = await build_connection_string( + ops_test, application, FIRST_DATABASE_RELATION_NAME, database="postgres" + ) + with pytest.raises(psycopg2.Error): + psycopg2.connect(connection_string) + connection_string = await build_connection_string( + ops_test, + application, + FIRST_DATABASE_RELATION_NAME, + database=other_application_database, + ) + with pytest.raises(psycopg2.Error): + psycopg2.connect(connection_string) + + +@pytest.mark.group(1) +async def test_an_application_can_connect_to_multiple_database_clusters(ops_test: OpsTest): + """Test that an application can connect to different clusters of the same database.""" + # Relate the application with both database clusters + # and wait for them exchanging some connection data. + first_cluster_relation = await ops_test.model.add_relation( + f"{APPLICATION_APP_NAME}:{MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", DATABASE_APP_NAME + ) + second_cluster_relation = await ops_test.model.add_relation( + f"{APPLICATION_APP_NAME}:{MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", + ANOTHER_DATABASE_APP_NAME, + ) + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") + + # Retrieve the connection string to both database clusters using the relation aliases + # and assert they are different. + application_connection_string = await build_connection_string( + ops_test, + APPLICATION_APP_NAME, + MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, + relation_id=first_cluster_relation.id, + ) + another_application_connection_string = await build_connection_string( + ops_test, + APPLICATION_APP_NAME, + MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, + relation_id=second_cluster_relation.id, + ) + assert application_connection_string != another_application_connection_string + + +@pytest.mark.group(1) +async def test_an_application_can_connect_to_multiple_aliased_database_clusters(ops_test: OpsTest): + """Test that an application can connect to different clusters of the same database.""" + # Relate the application with both database clusters + # and wait for them exchanging some connection data. + await asyncio.gather( + ops_test.model.add_relation( + f"{APPLICATION_APP_NAME}:{ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", + DATABASE_APP_NAME, + ), + ops_test.model.add_relation( + f"{APPLICATION_APP_NAME}:{ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME}", + ANOTHER_DATABASE_APP_NAME, + ), + ) + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") + + # Retrieve the connection string to both database clusters using the relation aliases + # and assert they are different. + application_connection_string = await build_connection_string( + ops_test, + APPLICATION_APP_NAME, + ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, + relation_alias="cluster1", + ) + another_application_connection_string = await build_connection_string( + ops_test, + APPLICATION_APP_NAME, + ALIASED_MULTIPLE_DATABASE_CLUSTERS_RELATION_NAME, + relation_alias="cluster2", + ) + assert application_connection_string != another_application_connection_string + + +@pytest.mark.group(1) +async def test_an_application_can_request_multiple_databases(ops_test: OpsTest): + """Test that an application can request additional databases using the same interface.""" + # Relate the charms using another relation and wait for them exchanging some connection data. + await ops_test.model.add_relation( + f"{APPLICATION_APP_NAME}:{SECOND_DATABASE_RELATION_NAME}", DATABASE_APP_NAME + ) + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active") + + # Get the connection strings to connect to both databases. + first_database_connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME + ) + second_database_connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, SECOND_DATABASE_RELATION_NAME + ) + + # Assert the two application have different relation (connection) data. + assert first_database_connection_string != second_database_connection_string + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_relation_data_is_updated_correctly_when_scaling(ops_test: OpsTest): + """Test that relation data, like connection data, is updated correctly when scaling.""" + # Retrieve the list of current database unit names. + units_to_remove = [unit.name for unit in ops_test.model.applications[DATABASE_APP_NAME].units] + + async with ops_test.fast_forward(fast_interval="60s"): + # Add two more units. + await ops_test.model.applications[DATABASE_APP_NAME].add_units(2) + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME], status="active", timeout=1500, wait_for_exact_units=4 + ) + + assert_sync_standbys( + ops_test.model.applications[DATABASE_APP_NAME].units[0].public_address, 2 + ) + + # Remove the original units. + leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) + await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(*[ + unit for unit in units_to_remove if unit != leader_unit.name + ]) + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=3 + ) + await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(leader_unit.name) + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=2 + ) + + # Get the updated connection data and assert it can be used + # to write and read some data properly. + primary_connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME + ) + replica_connection_string = await build_connection_string( + ops_test, APPLICATION_APP_NAME, FIRST_DATABASE_RELATION_NAME, read_only_endpoint=True + ) + + # Connect to the database using the primary connection string. + with psycopg2.connect(primary_connection_string) as connection: + connection.autocommit = True + with connection.cursor() as cursor: + # Check that it's possible to write and read data from the database that + # was created for the application. + cursor.execute("DROP TABLE IF EXISTS test;") + cursor.execute("CREATE TABLE test(data TEXT);") + cursor.execute("INSERT INTO test(data) VALUES('some data');") + cursor.execute("SELECT data FROM test;") + data = cursor.fetchone() + assert data[0] == "some data" + connection.close() + + # Connect to the database using the replica endpoint. + with psycopg2.connect(replica_connection_string) as connection: + with connection.cursor() as cursor: + # Read some data. + cursor.execute("SELECT data FROM test;") + data = cursor.fetchone() + assert data[0] == "some data" + + # Try to alter some data in a read-only transaction. + with pytest.raises(psycopg2.errors.ReadOnlySqlTransaction): + cursor.execute("DROP TABLE test;") + connection.close() + + async with ops_test.fast_forward(): + # Remove the relation and test that its user was deleted + # (by checking that the connection string doesn't work anymore). + await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( + f"{DATABASE_APP_NAME}:database", + f"{APPLICATION_APP_NAME}:{FIRST_DATABASE_RELATION_NAME}", + ) + await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) + with pytest.raises(psycopg2.OperationalError): + psycopg2.connect(primary_connection_string) + + +@pytest.mark.group(1) +async def test_relation_with_no_database_name(ops_test: OpsTest): + """Test that a relation with no database name doesn't block the charm.""" + async with ops_test.fast_forward(): + # Relate the charms using a relation that doesn't provide a database name. + await ops_test.model.add_relation( + f"{APPLICATION_APP_NAME}:{NO_DATABASE_RELATION_NAME}", DATABASE_APP_NAME + ) + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", raise_on_blocked=True) + + # Break the relation. + await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( + f"{DATABASE_APP_NAME}", f"{APPLICATION_APP_NAME}:{NO_DATABASE_RELATION_NAME}" + ) + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", raise_on_blocked=True) + + +@pytest.mark.group(1) +async def test_admin_role(ops_test: OpsTest): + """Test that the admin role gives access to all the databases.""" + all_app_names = [DATA_INTEGRATOR_APP_NAME] + all_app_names.extend(APP_NAMES) + async with ops_test.fast_forward(): + await ops_test.model.deploy(DATA_INTEGRATOR_APP_NAME) + await ops_test.model.wait_for_idle(apps=[DATA_INTEGRATOR_APP_NAME], status="blocked") + await ops_test.model.applications[DATA_INTEGRATOR_APP_NAME].set_config({ + "database-name": DATA_INTEGRATOR_APP_NAME.replace("-", "_"), + "extra-user-roles": "admin", + }) + await ops_test.model.wait_for_idle(apps=[DATA_INTEGRATOR_APP_NAME], status="blocked") + await ops_test.model.add_relation(DATA_INTEGRATOR_APP_NAME, DATABASE_APP_NAME) + await ops_test.model.wait_for_idle(apps=all_app_names, status="active") + + # Check that the user can access all the databases. + for database in [ + "postgres", + f"{APPLICATION_APP_NAME.replace('-', '_')}_first_database", + "another_application_first_database", + ]: + logger.info(f"connecting to the following database: {database}") + connection_string = await build_connection_string( + ops_test, DATA_INTEGRATOR_APP_NAME, "postgresql", database=database + ) + connection = None + should_fail = False + try: + with psycopg2.connect(connection_string) as connection, connection.cursor() as cursor: + # Check the version that the application received is the same on the + # database server. + cursor.execute("SELECT version();") + data = cursor.fetchone()[0].split(" ")[1] + + # Get the version of the database and compare with the information that + # was retrieved directly from the database. + version = await get_application_relation_data( + ops_test, DATA_INTEGRATOR_APP_NAME, "postgresql", "version" + ) + assert version == data + + # Write some data (it should fail in the "postgres" database). + random_name = ( + f"test_{''.join(secrets.choice(string.ascii_lowercase) for _ in range(10))}" + ) + should_fail = database == "postgres" + cursor.execute(f"CREATE TABLE {random_name}(data TEXT);") + if should_fail: + assert ( + False + ), f"failed to run a statement in the following database: {database}" + except psycopg2.errors.InsufficientPrivilege as e: + if not should_fail: + logger.exception(e) + assert ( + False + ), f"failed to connect to or run a statement in the following database: {database}" + finally: + if connection is not None: + connection.close() + + # Test the creation and deletion of databases. + connection_string = await build_connection_string( + ops_test, DATA_INTEGRATOR_APP_NAME, "postgresql", database="postgres" + ) + connection = psycopg2.connect(connection_string) + connection.autocommit = True + cursor = connection.cursor() + random_name = f"test_{''.join(secrets.choice(string.ascii_lowercase) for _ in range(10))}" + cursor.execute(f"CREATE DATABASE {random_name};") + cursor.execute(f"DROP DATABASE {random_name};") + try: + cursor.execute("DROP DATABASE postgres;") + assert False, "the admin extra user role was able to drop the `postgres` system database" + except psycopg2.errors.InsufficientPrivilege: + # Ignore the error, as the admin extra user role mustn't be able to drop + # the "postgres" system database. + pass + finally: + connection.close() + + +@pytest.mark.group(1) +async def test_invalid_extra_user_roles(ops_test: OpsTest): + async with ops_test.fast_forward(): + # Remove the relation between the database and the first data integrator. + await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( + DATABASE_APP_NAME, DATA_INTEGRATOR_APP_NAME + ) + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", raise_on_blocked=True) + + another_data_integrator_app_name = f"another-{DATA_INTEGRATOR_APP_NAME}" + data_integrator_apps_names = [DATA_INTEGRATOR_APP_NAME, another_data_integrator_app_name] + await ops_test.model.deploy( + DATA_INTEGRATOR_APP_NAME, application_name=another_data_integrator_app_name + ) + await ops_test.model.wait_for_idle( + apps=[another_data_integrator_app_name], status="blocked" + ) + for app in data_integrator_apps_names: + await ops_test.model.applications[app].set_config({ + "database-name": app.replace("-", "_"), + "extra-user-roles": "test", + }) + await ops_test.model.wait_for_idle(apps=data_integrator_apps_names, status="blocked") + for app in data_integrator_apps_names: + await ops_test.model.add_relation(f"{app}:postgresql", f"{DATABASE_APP_NAME}:database") + await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME]) + await ops_test.model.block_until( + lambda: any( + unit.workload_status_message == INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE + for unit in ops_test.model.applications[DATABASE_APP_NAME].units + ), + timeout=1000, + ) + + # Verify that the charm remains blocked if there are still other relations with invalid + # extra user roles. + await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( + f"{DATABASE_APP_NAME}:database", f"{DATA_INTEGRATOR_APP_NAME}:postgresql" + ) + await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME]) + ops_test.model.block_until( + lambda: any( + unit.workload_status_message == INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE + for unit in ops_test.model.applications[DATABASE_APP_NAME].units + ), + timeout=1000, + ) + + # Verify that active status is restored after all relations are removed. + await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( + f"{DATABASE_APP_NAME}:database", f"{another_data_integrator_app_name}:postgresql" + ) + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME], + status="active", + raise_on_blocked=False, + timeout=1000, + ) + + +@pytest.mark.group(1) +@markers.amd64_only # nextcloud charm not available for arm64 +async def test_nextcloud_db_blocked(ops_test: OpsTest, charm: str) -> None: + async with ops_test.fast_forward(): + # Deploy Nextcloud. + await ops_test.model.deploy( + "nextcloud", + channel="edge", + application_name="nextcloud", + num_units=1, + ) + await ops_test.model.wait_for_idle( + apps=["nextcloud"], + status="blocked", + raise_on_blocked=False, + timeout=1000, + ) + + await ops_test.model.relate("nextcloud:database", f"{DATABASE_APP_NAME}:database") + + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME, "nextcloud"], + status="active", + raise_on_blocked=False, + timeout=1000, + ) diff --git a/tests/integration/relations/test_relations.py b/tests/integration/relations/test_relations.py new file mode 100644 index 0000000000..58a0462ceb --- /dev/null +++ b/tests/integration/relations/test_relations.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +import asyncio +import logging + +import psycopg2 +import pytest +from pytest_operator.plugin import OpsTest +from tenacity import Retrying, stop_after_delay, wait_fixed + +from ..helpers import CHARM_SERIES, METADATA +from ..new_relations.test_new_relations import APPLICATION_APP_NAME, build_connection_string +from ..relations.helpers import get_legacy_db_connection_str + +logger = logging.getLogger(__name__) + +APP_NAME = METADATA["name"] +# MAILMAN3_CORE_APP_NAME = "mailman3-core" +DB_RELATION = "db" +DATABASE_RELATION = "database" +FIRST_DATABASE_RELATION = "first-database" +DATABASE_APP_NAME = "database-app" +DB_APP_NAME = "db-app" +APP_NAMES = [APP_NAME, DATABASE_APP_NAME, DB_APP_NAME] + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_deploy_charms(ops_test: OpsTest, charm): + """Deploy both charms (application and database) to use in the tests.""" + # Deploy both charms (multiple units for each application to test that later they correctly + # set data in the relation application databag using only the leader unit). + async with ops_test.fast_forward(): + await asyncio.gather( + ops_test.model.deploy( + APPLICATION_APP_NAME, + application_name=DATABASE_APP_NAME, + num_units=1, + series=CHARM_SERIES, + channel="edge", + ), + ops_test.model.deploy( + charm, + application_name=APP_NAME, + num_units=1, + series=CHARM_SERIES, + config={ + "profile": "testing", + "plugin_unaccent_enable": "True", + "plugin_pg_trgm_enable": "True", + }, + ), + ops_test.model.deploy( + APPLICATION_APP_NAME, + application_name=DB_APP_NAME, + num_units=1, + series=CHARM_SERIES, + channel="edge", + ), + ) + + await ops_test.model.wait_for_idle(apps=APP_NAMES, status="active", timeout=3000) + + +@pytest.mark.group(1) +async def test_legacy_endpoint_with_multiple_related_endpoints(ops_test: OpsTest): + await ops_test.model.relate(f"{DB_APP_NAME}:{DB_RELATION}", f"{APP_NAME}:{DB_RELATION}") + await ops_test.model.relate(APP_NAME, f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}") + + app = ops_test.model.applications[APP_NAME] + await ops_test.model.block_until( + lambda: "blocked" in {unit.workload_status for unit in app.units}, + timeout=1500, + ) + + logger.info(" remove relation with modern endpoints") + await ops_test.model.applications[APP_NAME].remove_relation( + f"{APP_NAME}:{DATABASE_RELATION}", f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}" + ) + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle( + status="active", + timeout=1500, + raise_on_error=False, + ) + + legacy_interface_connect = await get_legacy_db_connection_str( + ops_test, DB_APP_NAME, DB_RELATION, remote_unit_name=f"{APP_NAME}/0" + ) + logger.info(f" check connect to = {legacy_interface_connect}") + for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): + with attempt: + with psycopg2.connect(legacy_interface_connect) as connection: + assert connection.status == psycopg2.extensions.STATUS_READY + + logger.info(f" remove relation {DB_APP_NAME}:{DB_RELATION}") + async with ops_test.fast_forward(): + await ops_test.model.applications[APP_NAME].remove_relation( + f"{APP_NAME}:{DB_RELATION}", f"{DB_APP_NAME}:{DB_RELATION}" + ) + await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) + for attempt in Retrying(stop=stop_after_delay(60 * 5), wait=wait_fixed(10)): + with attempt: + with pytest.raises(psycopg2.OperationalError): + psycopg2.connect(legacy_interface_connect) + + +@pytest.mark.group(1) +async def test_modern_endpoint_with_multiple_related_endpoints(ops_test: OpsTest): + await ops_test.model.relate(f"{DB_APP_NAME}:{DB_RELATION}", f"{APP_NAME}:{DB_RELATION}") + await ops_test.model.relate(APP_NAME, f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}") + + app = ops_test.model.applications[APP_NAME] + await ops_test.model.block_until( + lambda: "blocked" in {unit.workload_status for unit in app.units}, + timeout=1500, + ) + + logger.info(" remove relation with legacy endpoints") + await ops_test.model.applications[APP_NAME].remove_relation( + f"{DB_APP_NAME}:{DB_RELATION}", f"{APP_NAME}:{DB_RELATION}" + ) + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle(status="active", timeout=3000, raise_on_error=False) + + modern_interface_connect = await build_connection_string( + ops_test, DATABASE_APP_NAME, FIRST_DATABASE_RELATION + ) + logger.info(f"check connect to = {modern_interface_connect}") + for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): + with attempt: + with psycopg2.connect(modern_interface_connect) as connection: + assert connection.status == psycopg2.extensions.STATUS_READY + + logger.info(f"remove relation {DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}") + async with ops_test.fast_forward(): + await ops_test.model.applications[APP_NAME].remove_relation( + f"{APP_NAME}:{DATABASE_RELATION}", f"{DATABASE_APP_NAME}:{FIRST_DATABASE_RELATION}" + ) + await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) + for attempt in Retrying(stop=stop_after_delay(60 * 5), wait=wait_fixed(10)): + with attempt: + with pytest.raises(psycopg2.OperationalError): + psycopg2.connect(modern_interface_connect) From 8bd4bafe744614ed2baeb7ea06c73feeae16aedc Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Wed, 19 Jun 2024 10:58:47 -0300 Subject: [PATCH 13/13] Fail fast Signed-off-by: Marcelo Henrique Neppel --- tests/integration/ha_tests/test_async_replication.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 96bdb3afa4..2df6b6215c 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -150,6 +150,7 @@ async def test_async_replication( continuous_writes, ) -> None: """Test async replication between two PostgreSQL clusters.""" + raise Exception("Fail fast.") logger.info("starting continuous writes to the database") await start_continuous_writes(ops_test, DATABASE_APP_NAME)