diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2a02fb2cb3..fd090ca778 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -70,7 +70,7 @@ jobs: - lint - unit-test - build - uses: canonical/data-platform-workflows/.github/workflows/integration_test_charm.yaml@v14.1.0 + uses: canonical/data-platform-workflows/.github/workflows/integration_test_charm.yaml@dpe-4685-multiple-models-status-and-logs with: artifact-prefix: ${{ needs.build.outputs.artifact-prefix }} architecture: ${{ matrix.architecture }} diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 96bdb3afa4..2df6b6215c 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -150,6 +150,7 @@ async def test_async_replication( continuous_writes, ) -> None: """Test async replication between two PostgreSQL clusters.""" + raise Exception("Fail fast.") logger.info("starting continuous writes to the database") await start_continuous_writes(ops_test, DATABASE_APP_NAME) diff --git a/tests/integration/ha_tests/test_replication.py b/tests/integration/ha_tests/test_replication.py deleted file mode 100644 index 600e2997d4..0000000000 --- a/tests/integration/ha_tests/test_replication.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. - -import pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from ..helpers import APPLICATION_NAME, CHARM_SERIES, db_connect, scale_application -from .helpers import ( - app_name, - are_writes_increasing, - check_writes, - fetch_cluster_members, - get_password, - get_primary, - start_continuous_writes, -) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy three unit of PostgreSQL.""" - wait_for_apps = False - # It is possible for users to provide their own cluster for HA testing. Hence, check if there - # is a pre-existing cluster. - if not await app_name(ops_test): - wait_for_apps = True - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - # Deploy the continuous writes application charm if it wasn't already deployed. - if not await app_name(ops_test, APPLICATION_NAME): - wait_for_apps = True - async with ops_test.fast_forward(): - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=APPLICATION_NAME, - series=CHARM_SERIES, - channel="edge", - ) - - if wait_for_apps: - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - -@pytest.mark.group(1) -async def test_reelection(ops_test: OpsTest, continuous_writes, primary_start_timeout) -> None: - """Kill primary unit, check reelection.""" - app = await app_name(ops_test) - if len(ops_test.model.applications[app].units) < 2: - await scale_application(ops_test, app, 2) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Remove the primary unit. - primary_name = await get_primary(ops_test, app) - await ops_test.model.destroy_units( - primary_name, - ) - - # Wait and get the primary again (which can be any unit, including the previous primary). - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(apps=[app], status="active") - - await are_writes_increasing(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name != primary_name, "primary reelection hasn't happened" - - # Verify that all units are part of the same cluster. - member_ips = await fetch_cluster_members(ops_test) - app = primary_name.split("/")[0] - ip_addresses = [unit.public_address for unit in ops_test.model.applications[app].units] - assert set(member_ips) == set(ip_addresses), "not all units are part of the same cluster." - - # Verify that no writes to the database were missed after stopping the writes. - await check_writes(ops_test) - - -@pytest.mark.group(1) -async def test_consistency(ops_test: OpsTest, continuous_writes) -> None: - """Write to primary, read data from secondaries (check consistency).""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - await are_writes_increasing(ops_test, primary_name) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - await check_writes(ops_test) - - -@pytest.mark.group(1) -async def test_no_data_replicated_between_clusters(ops_test: OpsTest, continuous_writes) -> None: - """Check that writes in one cluster are not replicated to another cluster.""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Deploy another cluster. - new_cluster_app = f"second-{app}" - if not await app_name(ops_test, new_cluster_app): - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=new_cluster_app, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.wait_for_idle( - apps=[new_cluster_app], status="active", timeout=1500 - ) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - await are_writes_increasing(ops_test, primary_name) - - # Verify that no writes to the first cluster were missed after stopping the writes. - await check_writes(ops_test) - - # Verify that the data from the first cluster wasn't replicated to the second cluster. - password = await get_password(ops_test, app=new_cluster_app) - for unit in ops_test.model.applications[new_cluster_app].units: - try: - with db_connect( - host=unit.public_address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'continuous_writes');" - ) - assert not cursor.fetchone()[ - 0 - ], "table 'continuous_writes' was replicated to the second cluster" - finally: - connection.close() diff --git a/tests/integration/ha_tests/test_restore_cluster.py b/tests/integration/ha_tests/test_restore_cluster.py deleted file mode 100644 index d6af07e251..0000000000 --- a/tests/integration/ha_tests/test_restore_cluster.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - CHARM_SERIES, - db_connect, - get_password, - get_patroni_cluster, - get_primary, - get_unit_address, - set_password, -) -from .helpers import ( - add_unit_with_storage, - reused_full_cluster_recovery_storage, - storage_id, -) - -FIRST_APPLICATION = "first-cluster" -SECOND_APPLICATION = "second-cluster" - -logger = logging.getLogger(__name__) - -charm = None - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy two PostgreSQL clusters.""" - # This is a potentially destructive test, so it shouldn't be run against existing clusters - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - # Deploy the first cluster with reusable storage - await ops_test.model.deploy( - charm, - application_name=FIRST_APPLICATION, - num_units=3, - series=CHARM_SERIES, - storage={"pgdata": {"pool": "lxd-btrfs", "size": 2048}}, - config={"profile": "testing"}, - ) - - # Deploy the second cluster - await ops_test.model.deploy( - charm, - application_name=SECOND_APPLICATION, - num_units=1, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - # TODO have a better way to bootstrap clusters with existing storage - primary = await get_primary( - ops_test, ops_test.model.applications[FIRST_APPLICATION].units[0].name - ) - for user in ["monitoring", "operator", "replication", "rewind"]: - password = await get_password(ops_test, primary, user) - second_primary = ops_test.model.applications[SECOND_APPLICATION].units[0].name - await set_password(ops_test, second_primary, user, password) - await ops_test.model.destroy_unit(second_primary) - - -@pytest.mark.group(1) -async def test_cluster_restore(ops_test): - """Recreates the cluster from storage volumes.""" - # Write some data. - primary = await get_primary( - ops_test, ops_test.model.applications[FIRST_APPLICATION].units[0].name - ) - password = await get_password(ops_test, primary) - address = get_unit_address(ops_test, primary) - logger.info("creating a table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute( - "CREATE TABLE IF NOT EXISTS restore_table_1 (test_collumn INT );" - ) - connection.close() - - logger.info("Downscaling the existing cluster") - storages = [] - for unit in ops_test.model.applications[FIRST_APPLICATION].units: - storages.append(storage_id(ops_test, unit.name)) - await ops_test.model.destroy_unit(unit.name) - - await ops_test.model.remove_application(FIRST_APPLICATION, block_until_done=True) - - # Recreate cluster - logger.info("Upscaling the second cluster with the old data") - for storage in storages: - unit = await add_unit_with_storage(ops_test, SECOND_APPLICATION, storage) - assert await reused_full_cluster_recovery_storage( - ops_test, unit.name - ), "attached storage not properly re-used by Postgresql." - - primary = await get_primary( - ops_test, ops_test.model.applications[SECOND_APPLICATION].units[0].name - ) - address = get_unit_address(ops_test, primary) - logger.info("checking that data was persisted") - with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'restore_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "data wasn't correctly restored: table 'restore_table_1' doesn't exist" - connection.close() - - # check that there is only one primary - cluster = get_patroni_cluster( - ops_test.model.applications[SECOND_APPLICATION].units[0].public_address - ) - primaries = [member for member in cluster["members"] if member["role"] == "leader"] - assert len(primaries) == 1, "There isn't just a single primary" - - # check that all units are member of the new cluster - members = [member["name"] for member in cluster["members"]] - for unit in ops_test.model.applications[SECOND_APPLICATION].units: - assert unit.name.replace("/", "-") in members, "Unit missing from cluster" - assert len(members) == len(storages), "Number of restored units and reused storages diverge" diff --git a/tests/integration/ha_tests/test_self_healing.py b/tests/integration/ha_tests/test_self_healing.py deleted file mode 100644 index 63d5b5abaa..0000000000 --- a/tests/integration/ha_tests/test_self_healing.py +++ /dev/null @@ -1,545 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import asyncio -import logging - -import pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from ..helpers import ( - CHARM_SERIES, - db_connect, - get_machine_from_unit, - get_password, - get_unit_address, - run_command_on_unit, -) -from .conftest import APPLICATION_NAME -from .helpers import ( - METADATA, - ORIGINAL_RESTART_CONDITION, - add_unit_with_storage, - app_name, - are_all_db_processes_down, - are_writes_increasing, - change_patroni_setting, - change_wal_settings, - check_writes, - cut_network_from_unit, - cut_network_from_unit_without_ip_change, - fetch_cluster_members, - get_controller_machine, - get_patroni_setting, - get_primary, - get_unit_ip, - is_cluster_updated, - is_connection_possible, - is_machine_reachable_from, - is_postgresql_ready, - is_replica, - is_secondary_up_to_date, - list_wal_files, - restore_network_for_unit, - restore_network_for_unit_without_ip_change, - reused_replica_storage, - send_signal_to_process, - start_continuous_writes, - storage_id, - storage_type, - update_restart_condition, - wait_network_restore, -) - -logger = logging.getLogger(__name__) - -APP_NAME = METADATA["name"] -PATRONI_PROCESS = "/snap/charmed-postgresql/[0-9]*/usr/bin/patroni" -POSTGRESQL_PROCESS = "postgres" -DB_PROCESSES = [POSTGRESQL_PROCESS, PATRONI_PROCESS] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy three unit of PostgreSQL.""" - wait_for_apps = False - # It is possible for users to provide their own cluster for HA testing. Hence, check if there - # is a pre-existing cluster. - if not await app_name(ops_test): - wait_for_apps = True - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - num_units=3, - series=CHARM_SERIES, - storage={"pgdata": {"pool": "lxd-btrfs", "size": 2048}}, - config={"profile": "testing"}, - ) - # Deploy the continuous writes application charm if it wasn't already deployed. - if not await app_name(ops_test, APPLICATION_NAME): - wait_for_apps = True - async with ops_test.fast_forward(): - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=APPLICATION_NAME, - series=CHARM_SERIES, - channel="edge", - ) - - if wait_for_apps: - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - -@pytest.mark.group(1) -async def test_storage_re_use(ops_test, continuous_writes): - """Verifies that database units with attached storage correctly repurpose storage. - - It is not enough to verify that Juju attaches the storage. Hence test checks that the - postgresql properly uses the storage that was provided. (ie. doesn't just re-sync everything - from primary, but instead computes a diff between current storage and primary storage.) - """ - app = await app_name(ops_test) - if storage_type(ops_test, app) == "rootfs": - pytest.skip( - "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" - ) - - # removing the only replica can be disastrous - if len(ops_test.model.applications[app].units) < 2: - await ops_test.model.applications[app].add_unit(count=1) - await ops_test.model.wait_for_idle(apps=[app], status="active", timeout=1500) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # remove a unit and attach it's storage to a new unit - for unit in ops_test.model.applications[app].units: - if await is_replica(ops_test, unit.name): - break - unit_storage_id = storage_id(ops_test, unit.name) - expected_units = len(ops_test.model.applications[app].units) - 1 - await ops_test.model.destroy_unit(unit.name) - await ops_test.model.wait_for_idle( - apps=[app], status="active", timeout=1000, wait_for_exact_units=expected_units - ) - new_unit = await add_unit_with_storage(ops_test, app, unit_storage_id) - - assert await reused_replica_storage( - ops_test, new_unit.name - ), "attached storage not properly re-used by Postgresql." - - # Verify that no writes to the database were missed after stopping the writes. - total_expected_writes = await check_writes(ops_test) - - # Verify that new instance is up-to-date. - assert await is_secondary_up_to_date( - ops_test, new_unit.name, total_expected_writes - ), "new instance not up to date." - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -async def test_kill_db_process( - ops_test: OpsTest, process: str, continuous_writes, primary_start_timeout -) -> None: - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Kill the database process. - await send_signal_to_process(ops_test, primary_name, process, "SIGKILL") - - async with ops_test.fast_forward(): - await are_writes_increasing(ops_test, primary_name) - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name != primary_name - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -async def test_freeze_db_process( - ops_test: OpsTest, process: str, continuous_writes, primary_start_timeout -) -> None: - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Freeze the database process. - await send_signal_to_process(ops_test, primary_name, process, "SIGSTOP") - - async with ops_test.fast_forward(): - # Verify new writes are continuing by counting the number of writes before and after a - # 3 minutes wait (this is a little more than the loop wait configuration, that is - # considered to trigger a fail-over after primary_start_timeout is changed, and also - # when freezing the DB process it take some more time to trigger the fail-over). - try: - await are_writes_increasing(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app, down_unit=primary_name) - assert new_primary_name != primary_name - finally: - # Un-freeze the old primary. - await send_signal_to_process(ops_test, primary_name, process, "SIGCONT") - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -async def test_restart_db_process( - ops_test: OpsTest, process: str, continuous_writes, primary_start_timeout -) -> None: - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Restart the database process. - await send_signal_to_process(ops_test, primary_name, process, "SIGTERM") - - async with ops_test.fast_forward(): - await are_writes_increasing(ops_test, primary_name) - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - # Verify that a new primary gets elected (ie old primary is secondary). - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name != primary_name - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -@pytest.mark.parametrize("process", DB_PROCESSES) -@pytest.mark.parametrize("signal", ["SIGTERM", "SIGKILL"]) -async def test_full_cluster_restart( - ops_test: OpsTest, - process: str, - signal: str, - continuous_writes, - reset_restart_condition, - loop_wait, -) -> None: - """This tests checks that a cluster recovers from a full cluster restart. - - The test can be called a full cluster crash when the signal sent to the OS process - is SIGKILL. - """ - # Locate primary unit. - app = await app_name(ops_test) - - # Change the loop wait setting to make Patroni wait more time before restarting PostgreSQL. - initial_loop_wait = await get_patroni_setting(ops_test, "loop_wait") - await change_patroni_setting(ops_test, "loop_wait", 300, use_random_unit=True) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Restart all units "simultaneously". - await asyncio.gather(*[ - send_signal_to_process(ops_test, unit.name, process, signal) - for unit in ops_test.model.applications[app].units - ]) - - # This test serves to verify behavior when all replicas are down at the same time that when - # they come back online they operate as expected. This check verifies that we meet the criteria - # of all replicas being down at the same time. - try: - assert await are_all_db_processes_down( - ops_test, process - ), "Not all units down at the same time." - finally: - if process == PATRONI_PROCESS: - awaits = [] - for unit in ops_test.model.applications[app].units: - awaits.append(update_restart_condition(ops_test, unit, ORIGINAL_RESTART_CONDITION)) - await asyncio.gather(*awaits) - await change_patroni_setting( - ops_test, "loop_wait", initial_loop_wait, use_random_unit=True - ) - - # Verify all units are up and running. - for unit in ops_test.model.applications[app].units: - assert await is_postgresql_ready( - ops_test, unit.name - ), f"unit {unit.name} not restarted after cluster restart." - - async with ops_test.fast_forward(): - await are_writes_increasing(ops_test) - - # Verify that all units are part of the same cluster. - member_ips = await fetch_cluster_members(ops_test) - ip_addresses = [unit.public_address for unit in ops_test.model.applications[app].units] - assert set(member_ips) == set(ip_addresses), "not all units are part of the same cluster." - - # Verify that no writes to the database were missed after stopping the writes. - async with ops_test.fast_forward(): - await check_writes(ops_test) - - -@pytest.mark.group(1) -@pytest.mark.unstable -async def test_forceful_restart_without_data_and_transaction_logs( - ops_test: OpsTest, - continuous_writes, - primary_start_timeout, - wal_settings, -) -> None: - """A forceful restart with deleted data and without transaction logs (forced clone).""" - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Copy data dir content removal script. - await ops_test.juju( - "scp", "tests/integration/ha_tests/clean-data-dir.sh", f"{primary_name}:/tmp" - ) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Stop the systemd service on the primary unit. - await run_command_on_unit(ops_test, primary_name, "snap stop charmed-postgresql.patroni") - - # Data removal runs within a script, so it allows `*` expansion. - return_code, _, _ = await ops_test.juju( - "ssh", - primary_name, - "sudo", - "/tmp/clean-data-dir.sh", - ) - assert return_code == 0, "Failed to remove data directory" - - async with ops_test.fast_forward(): - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app) - assert new_primary_name is not None - assert new_primary_name != primary_name - - await are_writes_increasing(ops_test, primary_name) - - # Change some settings to enable WAL rotation. - for unit in ops_test.model.applications[app].units: - if unit.name == primary_name: - continue - await change_wal_settings(ops_test, unit.name, 32, 32, 1) - - # Rotate the WAL segments. - files = await list_wal_files(ops_test, app) - host = get_unit_address(ops_test, new_primary_name) - password = await get_password(ops_test, new_primary_name) - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - # Run some commands to make PostgreSQL do WAL rotation. - cursor.execute("SELECT pg_switch_wal();") - cursor.execute("CHECKPOINT;") - cursor.execute("SELECT pg_switch_wal();") - connection.close() - new_files = await list_wal_files(ops_test, app) - # Check that the WAL was correctly rotated. - for unit_name in files: - assert not files[unit_name].intersection( - new_files - ), "WAL segments weren't correctly rotated" - - # Start the systemd service in the old primary. - await run_command_on_unit(ops_test, primary_name, "snap start charmed-postgresql.patroni") - - # Verify that the database service got restarted and is ready in the old primary. - assert await is_postgresql_ready(ops_test, primary_name) - - await is_cluster_updated(ops_test, primary_name) - - -@pytest.mark.group(1) -async def test_network_cut(ops_test: OpsTest, continuous_writes, primary_start_timeout): - """Completely cut and restore network.""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Get unit hostname and IP. - primary_hostname = await get_machine_from_unit(ops_test, primary_name) - primary_ip = await get_unit_ip(ops_test, primary_name) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name - ), f"Connection {primary_name} is not possible" - - logger.info(f"Cutting network for {primary_name}") - cut_network_from_unit(primary_hostname) - - # Verify machine is not reachable from peer units. - all_units_names = [unit.name for unit in ops_test.model.applications[app].units] - for unit_name in set(all_units_names) - {primary_name}: - logger.info(f"checking for no connectivity between {primary_name} and {unit_name}") - hostname = await get_machine_from_unit(ops_test, unit_name) - assert not is_machine_reachable_from( - hostname, primary_hostname - ), "unit is reachable from peer" - - # Verify machine is not reachable from controller. - logger.info(f"checking for no connectivity between {primary_name} and the controller") - controller = await get_controller_machine(ops_test) - assert not is_machine_reachable_from( - controller, primary_hostname - ), "unit is reachable from controller" - - # Verify that connection is not possible. - logger.info("checking whether the connectivity to the database is not working") - assert not await is_connection_possible( - ops_test, primary_name - ), "Connection is possible after network cut" - - async with ops_test.fast_forward(): - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test, primary_name) - - logger.info("checking whether a new primary was elected") - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app, down_unit=primary_name) - assert new_primary_name != primary_name - - logger.info(f"Restoring network for {primary_name}") - restore_network_for_unit(primary_hostname) - - # Wait until the cluster becomes idle (some operations like updating the member - # IP are made). - logger.info("waiting for cluster to become idle after updating member IP") - async with ops_test.fast_forward(fast_interval="60s"): - await ops_test.model.wait_for_idle( - apps=[app], - status="active", - raise_on_blocked=True, - timeout=1000, - idle_period=30, - ) - - # Wait the LXD unit has its IP updated. - logger.info("waiting for IP address to be updated on Juju unit") - await wait_network_restore(ops_test, primary_name, primary_ip) - - # Verify that the database service got restarted and is ready in the old primary. - logger.info(f"waiting for the database service to be ready on {primary_name}") - assert await is_postgresql_ready(ops_test, primary_name, use_ip_from_inside=True) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name, use_ip_from_inside=True - ), "Connection is not possible after network restore" - - await is_cluster_updated(ops_test, primary_name, use_ip_from_inside=True) - - -@pytest.mark.group(1) -async def test_network_cut_without_ip_change( - ops_test: OpsTest, continuous_writes, primary_start_timeout -): - """Completely cut and restore network (situation when the unit IP doesn't change).""" - # Locate primary unit. - app = await app_name(ops_test) - primary_name = await get_primary(ops_test, app) - - # Start an application that continuously writes data to the database. - await start_continuous_writes(ops_test, app) - - # Get unit hostname and IP. - primary_hostname = await get_machine_from_unit(ops_test, primary_name) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name - ), f"Connection {primary_name} is not possible" - - logger.info(f"Cutting network for {primary_name}") - cut_network_from_unit_without_ip_change(primary_hostname) - - # Verify machine is not reachable from peer units. - all_units_names = [unit.name for unit in ops_test.model.applications[app].units] - for unit_name in set(all_units_names) - {primary_name}: - logger.info(f"checking for no connectivity between {primary_name} and {unit_name}") - hostname = await get_machine_from_unit(ops_test, unit_name) - assert not is_machine_reachable_from( - hostname, primary_hostname - ), "unit is reachable from peer" - - # Verify machine is not reachable from controller. - logger.info(f"checking for no connectivity between {primary_name} and the controller") - controller = await get_controller_machine(ops_test) - assert not is_machine_reachable_from( - controller, primary_hostname - ), "unit is reachable from controller" - - # Verify that connection is not possible. - logger.info("checking whether the connectivity to the database is not working") - assert not await is_connection_possible( - ops_test, primary_name - ), "Connection is possible after network cut" - - async with ops_test.fast_forward(): - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test, primary_name, use_ip_from_inside=True) - - logger.info("checking whether a new primary was elected") - # Verify that a new primary gets elected (ie old primary is secondary). - for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)): - with attempt: - new_primary_name = await get_primary(ops_test, app, down_unit=primary_name) - assert new_primary_name != primary_name - - logger.info(f"Restoring network for {primary_name}") - restore_network_for_unit_without_ip_change(primary_hostname) - - # Wait until the cluster becomes idle. - logger.info("waiting for cluster to become idle") - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(apps=[app], status="active") - - # Verify that the database service got restarted and is ready in the old primary. - logger.info(f"waiting for the database service to be ready on {primary_name}") - assert await is_postgresql_ready(ops_test, primary_name) - - # Verify that connection is possible. - logger.info("checking whether the connectivity to the database is working") - assert await is_connection_possible( - ops_test, primary_name - ), "Connection is not possible after network restore" - - await is_cluster_updated(ops_test, primary_name, use_ip_from_inside=True) diff --git a/tests/integration/ha_tests/test_smoke.py b/tests/integration/ha_tests/test_smoke.py deleted file mode 100644 index 8da83ce3eb..0000000000 --- a/tests/integration/ha_tests/test_smoke.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging -from asyncio import TimeoutError - -import pytest -from juju import tag -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from ..helpers import ( - APPLICATION_NAME, - CHARM_SERIES, -) -from ..juju_ import juju_major_version -from .helpers import ( - add_unit_with_storage, - check_db, - check_password_auth, - create_db, - get_any_deatached_storage, - is_postgresql_ready, - is_storage_exists, - remove_unit_force, - storage_id, -) - -TEST_DATABASE_NAME = "test_database" -DUP_APPLICATION_NAME = "postgres-test-dup" - -logger = logging.getLogger(__name__) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_app_force_removal(ops_test: OpsTest, charm: str): - """Remove unit with force while storage is alive.""" - async with ops_test.fast_forward(): - # Deploy the charm. - logger.info("deploying charm") - await ops_test.model.deploy( - charm, - application_name=APPLICATION_NAME, - num_units=1, - series=CHARM_SERIES, - storage={"pgdata": {"pool": "lxd-btrfs", "size": 8046}}, - config={"profile": "testing"}, - ) - - logger.info("waiting for idle") - await ops_test.model.wait_for_idle(apps=[APPLICATION_NAME], status="active", timeout=1500) - assert ops_test.model.applications[APPLICATION_NAME].units[0].workload_status == "active" - - primary_name = ops_test.model.applications[APPLICATION_NAME].units[0].name - - logger.info("waiting for postgresql") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_postgresql_ready(ops_test, primary_name) - - logger.info("getting storage id") - storage_id_str = storage_id(ops_test, primary_name) - - # Check if storage exists after application deployed - logger.info("werifing is storage exists") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_storage_exists(ops_test, storage_id_str) - - # Create test database to check there is no resources conflicts - logger.info("creating db") - await create_db(ops_test, APPLICATION_NAME, TEST_DATABASE_NAME) - - # Check that test database is not exists for new unit - logger.info("checking db") - assert await check_db(ops_test, APPLICATION_NAME, TEST_DATABASE_NAME) - - # Destroy charm - logger.info("force removing charm") - if juju_major_version == 2: - await remove_unit_force(ops_test, primary_name) - else: - await ops_test.model.destroy_unit( - primary_name, force=True, destroy_storage=False, max_wait=1500 - ) - - # Storage should remain - logger.info("werifing is storage exists") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_storage_exists(ops_test, storage_id_str) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_charm_garbage_ignorance(ops_test: OpsTest, charm: str): - """Test charm deploy in dirty environment with garbage storage.""" - async with ops_test.fast_forward(): - logger.info("checking garbage storage") - garbage_storage = None - for attempt in Retrying(stop=stop_after_delay(30 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - garbage_storage = await get_any_deatached_storage(ops_test) - - logger.info("add unit with attached storage") - await add_unit_with_storage(ops_test, APPLICATION_NAME, garbage_storage) - - primary_name = ops_test.model.applications[APPLICATION_NAME].units[0].name - - logger.info("waiting for postgresql") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_postgresql_ready(ops_test, primary_name) - - logger.info("getting storage id") - storage_id_str = storage_id(ops_test, primary_name) - - assert storage_id_str == garbage_storage - - # Check if storage exists after application deployed - logger.info("werifing is storage exists") - for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - assert await is_storage_exists(ops_test, storage_id_str) - - # Check that test database exists for new unit - logger.info("checking db") - assert await check_db(ops_test, APPLICATION_NAME, TEST_DATABASE_NAME) - - logger.info("removing charm") - await ops_test.model.destroy_unit(primary_name) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skipif(juju_major_version < 3, reason="Requires juju 3 or higher") -async def test_app_resources_conflicts_v3(ops_test: OpsTest, charm: str): - """Test application deploy in dirty environment with garbage storage from another application.""" - async with ops_test.fast_forward(): - logger.info("checking garbage storage") - garbage_storage = None - for attempt in Retrying(stop=stop_after_delay(30 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - garbage_storage = await get_any_deatached_storage(ops_test) - - logger.info("deploying duplicate application with attached storage") - await ops_test.model.deploy( - charm, - application_name=DUP_APPLICATION_NAME, - num_units=1, - series=CHARM_SERIES, - attach_storage=[tag.storage(garbage_storage)], - config={"profile": "testing"}, - ) - - # Reducing the update status frequency to speed up the triggering of deferred events. - await ops_test.model.set_config({"update-status-hook-interval": "10s"}) - - logger.info("waiting for duplicate application to be blocked") - try: - await ops_test.model.wait_for_idle( - apps=[DUP_APPLICATION_NAME], timeout=1000, status="blocked" - ) - except TimeoutError: - logger.info("Application is not in blocked state. Checking logs...") - - # Since application have postgresql db in storage from external application it should not be able to connect due to new password - logger.info("checking operator password auth") - assert not await check_password_auth( - ops_test, ops_test.model.applications[DUP_APPLICATION_NAME].units[0].name - ) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skipif(juju_major_version != 2, reason="Requires juju 2") -async def test_app_resources_conflicts_v2(ops_test: OpsTest, charm: str): - """Test application deploy in dirty environment with garbage storage from another application.""" - async with ops_test.fast_forward(): - logger.info("checking garbage storage") - garbage_storage = None - for attempt in Retrying(stop=stop_after_delay(30 * 3), wait=wait_fixed(3), reraise=True): - with attempt: - garbage_storage = await get_any_deatached_storage(ops_test) - - # Deploy duplicaate charm - logger.info("deploying duplicate application") - await ops_test.model.deploy( - charm, - application_name=DUP_APPLICATION_NAME, - num_units=1, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - logger.info("force removing charm") - await remove_unit_force( - ops_test, ops_test.model.applications[DUP_APPLICATION_NAME].units[0].name - ) - - # Add unit with garbage storage - logger.info("adding charm with attached storage") - add_unit_cmd = f"add-unit {DUP_APPLICATION_NAME} --model={ops_test.model.info.name} --attach-storage={garbage_storage}".split() - return_code, _, _ = await ops_test.juju(*add_unit_cmd) - assert return_code == 0, "Failed to add unit with storage" - - logger.info("waiting for duplicate application to be blocked") - try: - await ops_test.model.wait_for_idle( - apps=[DUP_APPLICATION_NAME], timeout=1000, status="blocked" - ) - except TimeoutError: - logger.info("Application is not in blocked state. Checking logs...") - - # Since application have postgresql db in storage from external application it should not be able to connect due to new password - logger.info("checking operator password auth") - assert not await check_password_auth( - ops_test, ops_test.model.applications[DUP_APPLICATION_NAME].units[0].name - ) diff --git a/tests/integration/ha_tests/test_upgrade.py b/tests/integration/ha_tests/test_upgrade.py deleted file mode 100644 index 608986eeca..0000000000 --- a/tests/integration/ha_tests/test_upgrade.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. - -import json -import logging -import shutil -import zipfile -from pathlib import Path -from typing import Union - -import pytest -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - APPLICATION_NAME, - DATABASE_APP_NAME, - count_switchovers, - get_leader_unit, - get_primary, -) -from ..new_relations.helpers import get_application_relation_data -from .helpers import ( - are_writes_increasing, - check_writes, - start_continuous_writes, -) - -logger = logging.getLogger(__name__) - -TIMEOUT = 600 - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_deploy_latest(ops_test: OpsTest) -> None: - """Simple test to ensure that the PostgreSQL and application charms get deployed.""" - await ops_test.model.deploy( - DATABASE_APP_NAME, - num_units=3, - channel="14/edge", - config={"profile": "testing"}, - ) - await ops_test.model.deploy( - APPLICATION_NAME, - num_units=1, - channel="latest/edge", - ) - logger.info("Wait for applications to become active") - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME], status="active", timeout=1500 - ) - assert len(ops_test.model.applications[DATABASE_APP_NAME].units) == 3 - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_pre_upgrade_check(ops_test: OpsTest) -> None: - """Test that the pre-upgrade-check action runs successfully.""" - logger.info("Get leader unit") - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - assert leader_unit is not None, "No leader unit found" - - logger.info("Run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_upgrade_from_edge(ops_test: OpsTest, continuous_writes) -> None: - # Start an application that continuously writes data to the database. - logger.info("starting continuous writes to the database") - await start_continuous_writes(ops_test, DATABASE_APP_NAME) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - primary_name = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - initial_number_of_switchovers = count_switchovers(ops_test, primary_name) - - application = ops_test.model.applications[DATABASE_APP_NAME] - - logger.info("Build charm locally") - charm = await ops_test.build_charm(".") - - logger.info("Refresh the charm") - await application.refresh(path=charm) - - logger.info("Wait for upgrade to start") - await ops_test.model.block_until( - lambda: "waiting" in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Wait for upgrade to complete") - async with ops_test.fast_forward("60s"): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", idle_period=30, timeout=TIMEOUT - ) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - logger.info("checking whether no writes were lost") - await check_writes(ops_test) - - logger.info("checking the number of switchovers") - final_number_of_switchovers = count_switchovers(ops_test, primary_name) - assert ( - final_number_of_switchovers - initial_number_of_switchovers - ) <= 2, "Number of switchovers is greater than 2" - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_fail_and_rollback(ops_test, continuous_writes) -> None: - # Start an application that continuously writes data to the database. - logger.info("starting continuous writes to the database") - await start_continuous_writes(ops_test, DATABASE_APP_NAME) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - logger.info("Get leader unit") - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - assert leader_unit is not None, "No leader unit found" - - logger.info("Run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - local_charm = await ops_test.build_charm(".") - if isinstance(local_charm, str): - filename = local_charm.split("/")[-1] - else: - filename = local_charm.name - fault_charm = Path("/tmp/", filename) - shutil.copy(local_charm, fault_charm) - - logger.info("Inject dependency fault") - await inject_dependency_fault(ops_test, DATABASE_APP_NAME, fault_charm) - - application = ops_test.model.applications[DATABASE_APP_NAME] - - logger.info("Refresh the charm") - await application.refresh(path=fault_charm) - - logger.info("Wait for upgrade to fail") - async with ops_test.fast_forward("60s"): - await ops_test.model.block_until( - lambda: "blocked" in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Ensure continuous_writes while in failure state on remaining units") - await are_writes_increasing(ops_test) - - logger.info("Re-run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - logger.info("Re-refresh the charm") - await application.refresh(path=local_charm) - - logger.info("Wait for upgrade to start") - await ops_test.model.block_until( - lambda: "waiting" in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Wait for application to recover") - async with ops_test.fast_forward("60s"): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=TIMEOUT - ) - - logger.info("Ensure continuous_writes after rollback procedure") - await are_writes_increasing(ops_test) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - logger.info("Checking whether no writes were lost") - await check_writes(ops_test) - - # Remove fault charm file. - fault_charm.unlink() - - -async def inject_dependency_fault( - ops_test: OpsTest, application_name: str, charm_file: Union[str, Path] -) -> None: - """Inject a dependency fault into the PostgreSQL charm.""" - # Query running dependency to overwrite with incompatible version. - dependencies = await get_application_relation_data( - ops_test, application_name, "upgrade", "dependencies" - ) - loaded_dependency_dict = json.loads(dependencies) - if "snap" not in loaded_dependency_dict: - loaded_dependency_dict["snap"] = {"dependencies": {}, "name": "charmed-postgresql"} - loaded_dependency_dict["snap"]["upgrade_supported"] = "^15" - loaded_dependency_dict["snap"]["version"] = "15.0" - - # Overwrite dependency.json with incompatible version. - with zipfile.ZipFile(charm_file, mode="a") as charm_zip: - charm_zip.writestr("src/dependency.json", json.dumps(loaded_dependency_dict)) diff --git a/tests/integration/ha_tests/test_upgrade_from_stable.py b/tests/integration/ha_tests/test_upgrade_from_stable.py deleted file mode 100644 index c1584c6a6c..0000000000 --- a/tests/integration/ha_tests/test_upgrade_from_stable.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import json -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from .. import markers -from ..helpers import ( - APPLICATION_NAME, - DATABASE_APP_NAME, - count_switchovers, - get_leader_unit, - get_primary, - remove_chown_workaround, -) -from .helpers import ( - are_writes_increasing, - check_writes, - start_continuous_writes, -) - -logger = logging.getLogger(__name__) - -TIMEOUT = 600 - - -@pytest.mark.group(1) -@markers.amd64_only # TODO: remove after arm64 stable release -@pytest.mark.abort_on_fail -async def test_deploy_stable(ops_test: OpsTest) -> None: - """Simple test to ensure that the PostgreSQL and application charms get deployed.""" - return_code, charm_info, stderr = await ops_test.juju("info", "postgresql", "--format=json") - if return_code != 0: - raise Exception(f"failed to get charm info with error: {stderr}") - # Revisions lower than 315 have a currently broken workaround for chown. - parsed_charm_info = json.loads(charm_info) - revision = ( - parsed_charm_info["channels"]["14"]["stable"][0]["revision"] - if "channels" in parsed_charm_info - else parsed_charm_info["channel-map"]["14/stable"]["revision"] - ) - logger.info(f"14/stable revision: {revision}") - if int(revision) < 315: - original_charm_name = "./postgresql.charm" - return_code, _, stderr = await ops_test.juju( - "download", - "postgresql", - "--channel=14/stable", - f"--filepath={original_charm_name}", - ) - if return_code != 0: - raise Exception( - f"failed to download charm from 14/stable channel with error: {stderr}" - ) - patched_charm_name = "./modified_postgresql.charm" - remove_chown_workaround(original_charm_name, patched_charm_name) - return_code, _, stderr = await ops_test.juju("deploy", patched_charm_name, "-n", "3") - if return_code != 0: - raise Exception(f"failed to deploy charm from 14/stable channel with error: {stderr}") - else: - await ops_test.model.deploy( - DATABASE_APP_NAME, - num_units=3, - channel="14/stable", - ) - await ops_test.model.deploy( - APPLICATION_NAME, - num_units=1, - channel="latest/edge", - ) - logger.info("Wait for applications to become active") - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME], status="active", timeout=(20 * 60) - ) - assert len(ops_test.model.applications[DATABASE_APP_NAME].units) == 3 - - -@pytest.mark.group(1) -@markers.amd64_only # TODO: remove after arm64 stable release -@pytest.mark.abort_on_fail -async def test_pre_upgrade_check(ops_test: OpsTest) -> None: - """Test that the pre-upgrade-check action runs successfully.""" - application = ops_test.model.applications[DATABASE_APP_NAME] - if "pre-upgrade-check" not in await application.get_actions(): - logger.info("skipping the test because the charm from 14/stable doesn't support upgrade") - return - - logger.info("Get leader unit") - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - assert leader_unit is not None, "No leader unit found" - - logger.info("Run pre-upgrade-check action") - action = await leader_unit.run_action("pre-upgrade-check") - await action.wait() - - -@pytest.mark.group(1) -@markers.amd64_only # TODO: remove after arm64 stable release -@pytest.mark.abort_on_fail -async def test_upgrade_from_stable(ops_test: OpsTest): - """Test updating from stable channel.""" - # Start an application that continuously writes data to the database. - logger.info("starting continuous writes to the database") - await start_continuous_writes(ops_test, DATABASE_APP_NAME) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - primary_name = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - initial_number_of_switchovers = count_switchovers(ops_test, primary_name) - - application = ops_test.model.applications[DATABASE_APP_NAME] - actions = await application.get_actions() - - logger.info("Build charm locally") - charm = await ops_test.build_charm(".") - - logger.info("Refresh the charm") - await application.refresh(path=charm) - - logger.info("Wait for upgrade to start") - await ops_test.model.block_until( - lambda: ("waiting" if "pre-upgrade-check" in actions else "maintenance") - in {unit.workload_status for unit in application.units}, - timeout=TIMEOUT, - ) - - logger.info("Wait for upgrade to complete") - async with ops_test.fast_forward("60s"): - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", idle_period=30, timeout=TIMEOUT - ) - - # Check whether writes are increasing. - logger.info("checking whether writes are increasing") - await are_writes_increasing(ops_test) - - # Verify that no writes to the database were missed after stopping the writes - # (check that all the units have all the writes). - logger.info("checking whether no writes were lost") - await check_writes(ops_test) - - # Check the number of switchovers. - if "pre-upgrade-check" in actions: - logger.info("checking the number of switchovers") - final_number_of_switchovers = count_switchovers(ops_test, primary_name) - assert ( - final_number_of_switchovers - initial_number_of_switchovers - ) <= 2, "Number of switchovers is greater than 2" diff --git a/tests/integration/test_backups.py b/tests/integration/test_backups.py deleted file mode 100644 index 887db7fd5b..0000000000 --- a/tests/integration/test_backups.py +++ /dev/null @@ -1,516 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import logging -import uuid -from typing import Dict, Tuple - -import boto3 -import pytest as pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_attempt, wait_exponential - -from . import architecture -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - construct_endpoint, - db_connect, - get_password, - get_primary, - get_unit_address, - scale_application, - switchover, - wait_for_idle_on_blocked, -) -from .juju_ import juju_major_version - -ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE = "the S3 repository has backups from another cluster" -FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE = ( - "failed to access/create the bucket, check your S3 settings" -) -FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE = "failed to initialize stanza, check your S3 settings" -S3_INTEGRATOR_APP_NAME = "s3-integrator" -if juju_major_version < 3: - tls_certificates_app_name = "tls-certificates-operator" - if architecture.architecture == "arm64": - tls_channel = "legacy/edge" - else: - tls_channel = "legacy/stable" - tls_config = {"generate-self-signed-certificates": "true", "ca-common-name": "Test CA"} -else: - tls_certificates_app_name = "self-signed-certificates" - if architecture.architecture == "arm64": - tls_channel = "latest/edge" - else: - tls_channel = "latest/stable" - tls_config = {"ca-common-name": "Test CA"} - -logger = logging.getLogger(__name__) - -AWS = "AWS" -GCP = "GCP" - - -@pytest.fixture(scope="module") -async def cloud_configs(ops_test: OpsTest, github_secrets) -> None: - # Define some configurations and credentials. - configs = { - AWS: { - "endpoint": "https://s3.amazonaws.com", - "bucket": "data-charms-testing", - "path": f"/postgresql-vm/{uuid.uuid1()}", - "region": "us-east-1", - }, - GCP: { - "endpoint": "https://storage.googleapis.com", - "bucket": "data-charms-testing", - "path": f"/postgresql-vm/{uuid.uuid1()}", - "region": "", - }, - } - credentials = { - AWS: { - "access-key": github_secrets["AWS_ACCESS_KEY"], - "secret-key": github_secrets["AWS_SECRET_KEY"], - }, - GCP: { - "access-key": github_secrets["GCP_ACCESS_KEY"], - "secret-key": github_secrets["GCP_SECRET_KEY"], - }, - } - yield configs, credentials - # Delete the previously created objects. - logger.info("deleting the previously created backups") - for cloud, config in configs.items(): - session = boto3.session.Session( - aws_access_key_id=credentials[cloud]["access-key"], - aws_secret_access_key=credentials[cloud]["secret-key"], - region_name=config["region"], - ) - s3 = session.resource( - "s3", endpoint_url=construct_endpoint(config["endpoint"], config["region"]) - ) - bucket = s3.Bucket(config["bucket"]) - # GCS doesn't support batch delete operation, so delete the objects one by one. - for bucket_object in bucket.objects.filter(Prefix=config["path"].lstrip("/")): - bucket_object.delete() - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_backup(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm) -> None: - """Build and deploy two units of PostgreSQL and then test the backup and restore actions.""" - # Deploy S3 Integrator and TLS Certificates Operator. - await ops_test.model.deploy(S3_INTEGRATOR_APP_NAME) - await ops_test.model.deploy(tls_certificates_app_name, config=tls_config, channel=tls_channel) - - for cloud, config in cloud_configs[0].items(): - # Deploy and relate PostgreSQL to S3 integrator (one database app for each cloud for now - # as archive_mode is disabled after restoring the backup) and to TLS Certificates Operator - # (to be able to create backups from replicas). - database_app_name = f"{DATABASE_APP_NAME}-{cloud.lower()}" - await ops_test.model.deploy( - charm, - application_name=database_app_name, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.relate(database_app_name, S3_INTEGRATOR_APP_NAME) - await ops_test.model.relate(database_app_name, tls_certificates_app_name) - - # Configure and set access and secret keys. - logger.info(f"configuring S3 integrator for {cloud}") - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(config) - action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( - "sync-s3-credentials", - **cloud_configs[1][cloud], - ) - await action.wait() - async with ops_test.fast_forward(fast_interval="60s"): - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1500 - ) - - primary = await get_primary(ops_test, f"{database_app_name}/0") - for unit in ops_test.model.applications[database_app_name].units: - if unit.name != primary: - replica = unit.name - break - - # Write some data. - password = await get_password(ops_test, primary) - address = get_unit_address(ops_test, primary) - logger.info("creating a table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute( - "CREATE TABLE IF NOT EXISTS backup_table_1 (test_collumn INT );" - ) - connection.close() - - # Run the "create backup" action. - logger.info("creating a backup") - action = await ops_test.model.units.get(replica).run_action("create-backup") - await action.wait() - backup_status = action.results.get("backup-status") - assert backup_status, "backup hasn't succeeded" - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1000 - ) - - # Run the "list backups" action. - logger.info("listing the available backups") - action = await ops_test.model.units.get(replica).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - # 2 lines for header output, 1 backup line ==> 3 total lines - assert len(backups.split("\n")) == 3, "full backup is not outputted" - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Write some data. - logger.info("creating a second table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute("CREATE TABLE backup_table_2 (test_collumn INT );") - connection.close() - - # Run the "create backup" action. - logger.info("creating a backup") - action = await ops_test.model.units.get(replica).run_action( - "create-backup", **{"type": "differential"} - ) - await action.wait() - backup_status = action.results.get("backup-status") - assert backup_status, "backup hasn't succeeded" - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Run the "list backups" action. - logger.info("listing the available backups") - action = await ops_test.model.units.get(replica).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - # 2 lines for header output, 2 backup lines ==> 4 total lines - assert len(backups.split("\n")) == 4, "differential backup is not outputted" - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Write some data. - logger.info("creating a second table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute("CREATE TABLE backup_table_3 (test_collumn INT );") - connection.close() - # Scale down to be able to restore. - async with ops_test.fast_forward(): - await ops_test.model.destroy_unit(replica) - await ops_test.model.block_until( - lambda: len(ops_test.model.applications[database_app_name].units) == 1 - ) - - for unit in ops_test.model.applications[database_app_name].units: - remaining_unit = unit - break - - # Run the "restore backup" action for differential backup. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - last_diff_backup = backups.split("\n")[-1] - backup_id = last_diff_backup.split()[0] - action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Check that the backup was correctly restored by having only the first created table. - logger.info("checking that the backup was correctly restored") - primary = await get_primary(ops_test, remaining_unit.name) - address = get_unit_address(ops_test, primary) - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_2' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_3' exists" - connection.close() - - # Run the "restore backup" action for full backup. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - last_full_backup = backups.split("\n")[-2] - backup_id = last_full_backup.split()[0] - action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Check that the backup was correctly restored by having only the first created table. - primary = await get_primary(ops_test, remaining_unit.name) - address = get_unit_address(ops_test, primary) - logger.info("checking that the backup was correctly restored") - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_2' exists" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_3' exists" - connection.close() - - # Run the following steps only in one cloud (it's enough for those checks). - if cloud == list(cloud_configs[0].keys())[0]: - # Remove the relation to the TLS certificates operator. - await ops_test.model.applications[database_app_name].remove_relation( - f"{database_app_name}:certificates", f"{tls_certificates_app_name}:certificates" - ) - await ops_test.model.wait_for_idle( - apps=[database_app_name], status="active", timeout=1000 - ) - - # Scale up to be able to test primary and leader being different. - async with ops_test.fast_forward(): - await scale_application(ops_test, database_app_name, 2) - - # Ensure replication is working correctly. - new_unit_name = f"{database_app_name}/2" - address = get_unit_address(ops_test, new_unit_name) - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], f"replication isn't working correctly: table 'backup_table_1' doesn't exist in {new_unit_name}" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert not cursor.fetchone()[ - 0 - ], f"replication isn't working correctly: table 'backup_table_2' exists in {new_unit_name}" - connection.close() - - switchover(ops_test, primary, new_unit_name) - - # Get the new primary unit. - primary = await get_primary(ops_test, new_unit_name) - # Check that the primary changed. - for attempt in Retrying( - stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - assert primary == new_unit_name - - # Ensure stanza is working correctly. - logger.info("listing the available backups") - action = await ops_test.model.units.get(new_unit_name).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - assert backups, "backups not outputted" - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Remove the database app. - await ops_test.model.remove_application(database_app_name, block_until_done=True) - - # Remove the TLS operator. - await ops_test.model.remove_application(tls_certificates_app_name, block_until_done=True) - - -@pytest.mark.group(1) -async def test_restore_on_new_cluster(ops_test: OpsTest, github_secrets, charm) -> None: - """Test that is possible to restore a backup to another PostgreSQL cluster.""" - previous_database_app_name = f"{DATABASE_APP_NAME}-gcp" - database_app_name = f"new-{DATABASE_APP_NAME}" - await ops_test.model.deploy(charm, application_name=previous_database_app_name) - await ops_test.model.deploy( - charm, - application_name=database_app_name, - series=CHARM_SERIES, - ) - await ops_test.model.relate(previous_database_app_name, S3_INTEGRATOR_APP_NAME) - await ops_test.model.relate(database_app_name, S3_INTEGRATOR_APP_NAME) - async with ops_test.fast_forward(): - logger.info( - "waiting for the database charm to become blocked due to existing backups from another cluster in the repository" - ) - await wait_for_idle_on_blocked( - ops_test, - previous_database_app_name, - 2, - S3_INTEGRATOR_APP_NAME, - ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, - ) - logger.info( - "waiting for the database charm to become blocked due to existing backups from another cluster in the repository" - ) - await wait_for_idle_on_blocked( - ops_test, - database_app_name, - 0, - S3_INTEGRATOR_APP_NAME, - ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, - ) - - # Remove the database app with the same name as the previous one (that was used only to test - # that the cluster becomes blocked). - await ops_test.model.remove_application(previous_database_app_name, block_until_done=True) - - # Run the "list backups" action. - unit_name = f"{database_app_name}/0" - logger.info("listing the available backups") - action = await ops_test.model.units.get(unit_name).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - assert backups, "backups not outputted" - await wait_for_idle_on_blocked( - ops_test, - database_app_name, - 0, - S3_INTEGRATOR_APP_NAME, - ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE, - ) - - # Run the "restore backup" action. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - most_recent_backup = backups.split("\n")[-1] - backup_id = most_recent_backup.split()[0] - action = await ops_test.model.units.get(unit_name).run_action( - "restore", **{"backup-id": backup_id} - ) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - unit = ops_test.model.units.get(f"{database_app_name}/0") - await ops_test.model.block_until( - lambda: unit.workload_status_message == ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE - ) - - # Check that the backup was correctly restored by having only the first created table. - logger.info("checking that the backup was correctly restored") - password = await get_password(ops_test, unit_name) - address = get_unit_address(ops_test, unit_name) - with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - connection.close() - - -@pytest.mark.group(1) -async def test_invalid_config_and_recovery_after_fixing_it( - ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict] -) -> None: - """Test that the charm can handle invalid and valid backup configurations.""" - database_app_name = f"new-{DATABASE_APP_NAME}" - - # Provide invalid backup configurations. - logger.info("configuring S3 integrator for an invalid cloud") - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config({ - "endpoint": "endpoint", - "bucket": "bucket", - "path": "path", - "region": "region", - }) - action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( - "sync-s3-credentials", - **{ - "access-key": "access-key", - "secret-key": "secret-key", - }, - ) - await action.wait() - logger.info("waiting for the database charm to become blocked") - unit = ops_test.model.units.get(f"{database_app_name}/0") - await ops_test.model.block_until( - lambda: unit.workload_status_message == FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE - ) - - # Provide valid backup configurations, but from another cluster repository. - logger.info( - "configuring S3 integrator for a valid cloud, but with the path of another cluster repository" - ) - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(cloud_configs[0][AWS]) - action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( - "sync-s3-credentials", - **cloud_configs[1][AWS], - ) - await action.wait() - logger.info("waiting for the database charm to become blocked") - unit = ops_test.model.units.get(f"{database_app_name}/0") - await ops_test.model.block_until( - lambda: unit.workload_status_message == ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE - ) - - # Provide valid backup configurations, with another path in the S3 bucket. - logger.info("configuring S3 integrator for a valid cloud") - config = cloud_configs[0][AWS].copy() - config["path"] = f"/postgresql/{uuid.uuid1()}" - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(config) - logger.info("waiting for the database charm to become active") - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active" - ) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py deleted file mode 100644 index 12ac5c5a46..0000000000 --- a/tests/integration/test_charm.py +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - - -import logging - -import psycopg2 -import pytest -import requests -from psycopg2 import sql -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_attempt, wait_exponential, wait_fixed - -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - STORAGE_PATH, - check_cluster_members, - convert_records_to_dict, - db_connect, - find_unit, - get_password, - get_primary, - get_unit_address, - scale_application, - switchover, -) - -logger = logging.getLogger(__name__) - -UNIT_IDS = [0, 1, 2] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy(ops_test: OpsTest, charm: str): - """Deploy the charm-under-test. - - Assert on the unit status before any relations/configurations take place. - """ - # Deploy the charm with Patroni resource. - await ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - # Reducing the update status frequency to speed up the triggering of deferred events. - await ops_test.model.set_config({"update-status-hook-interval": "10s"}) - - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1500) - assert ops_test.model.applications[DATABASE_APP_NAME].units[0].workload_status == "active" - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.parametrize("unit_id", UNIT_IDS) -async def test_database_is_up(ops_test: OpsTest, unit_id: int): - # Query Patroni REST API and check the status that indicates - # both Patroni and PostgreSQL are up and running. - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - result = requests.get(f"http://{host}:8008/health") - assert result.status_code == 200 - - -@pytest.mark.group(1) -@pytest.mark.parametrize("unit_id", UNIT_IDS) -async def test_exporter_is_up(ops_test: OpsTest, unit_id: int): - # Query Patroni REST API and check the status that indicates - # both Patroni and PostgreSQL are up and running. - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - result = requests.get(f"http://{host}:9187/metrics") - assert result.status_code == 200 - assert "pg_exporter_last_scrape_error 0" in result.content.decode( - "utf8" - ), "Scrape error in postgresql_prometheus_exporter" - - -@pytest.mark.group(1) -@pytest.mark.parametrize("unit_id", UNIT_IDS) -async def test_settings_are_correct(ops_test: OpsTest, unit_id: int): - # Connect to the PostgreSQL instance. - # Retrieving the operator user password using the action. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - password = await get_password(ops_test, any_unit_name) - - # Connect to PostgreSQL. - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - logger.info("connecting to the database host: %s", host) - with db_connect(host, password) as connection: - assert connection.status == psycopg2.extensions.STATUS_READY - - # Retrieve settings from PostgreSQL pg_settings table. - # Here the SQL query gets a key-value pair composed by the name of the setting - # and its value, filtering the retrieved data to return only the settings - # that were set by Patroni. - settings_names = [ - "archive_command", - "archive_mode", - "autovacuum", - "data_directory", - "cluster_name", - "data_checksums", - "fsync", - "full_page_writes", - "lc_messages", - "listen_addresses", - "log_autovacuum_min_duration", - "log_checkpoints", - "log_destination", - "log_temp_files", - "log_timezone", - "max_connections", - "wal_level", - ] - with connection.cursor() as cursor: - cursor.execute( - sql.SQL("SELECT name,setting FROM pg_settings WHERE name IN ({});").format( - sql.SQL(", ").join(sql.Placeholder() * len(settings_names)) - ), - settings_names, - ) - records = cursor.fetchall() - settings = convert_records_to_dict(records) - connection.close() - - # Validate each configuration set by Patroni on PostgreSQL. - assert settings["archive_command"] == "/bin/true" - assert settings["archive_mode"] == "on" - assert settings["autovacuum"] == "on" - assert settings["cluster_name"] == DATABASE_APP_NAME - assert settings["data_directory"] == f"{STORAGE_PATH}/var/lib/postgresql" - assert settings["data_checksums"] == "on" - assert settings["fsync"] == "on" - assert settings["full_page_writes"] == "on" - assert settings["lc_messages"] == "en_US.UTF8" - assert settings["listen_addresses"] == host - assert settings["log_autovacuum_min_duration"] == "60000" - assert settings["log_checkpoints"] == "on" - assert settings["log_destination"] == "stderr" - assert settings["log_temp_files"] == "1" - assert settings["log_timezone"] == "UTC" - assert settings["max_connections"] == "100" - assert settings["wal_level"] == "logical" - - # Retrieve settings from Patroni REST API. - result = requests.get(f"http://{host}:8008/config") - settings = result.json() - - # Validate each configuration related to Patroni - assert settings["postgresql"]["use_pg_rewind"] is True - assert settings["postgresql"]["remove_data_directory_on_rewind_failure"] is True - assert settings["postgresql"]["remove_data_directory_on_diverged_timelines"] is True - assert settings["loop_wait"] == 10 - assert settings["retry_timeout"] == 10 - assert settings["maximum_lag_on_failover"] == 1048576 - - logger.warning("Asserting port ranges") - unit = ops_test.model.applications[DATABASE_APP_NAME].units[unit_id] - assert unit.data["port-ranges"][0]["from-port"] == 5432 - assert unit.data["port-ranges"][0]["to-port"] == 5432 - assert unit.data["port-ranges"][0]["protocol"] == "tcp" - - -@pytest.mark.group(1) -async def test_postgresql_parameters_change(ops_test: OpsTest) -> None: - """Test that's possible to change PostgreSQL parameters.""" - await ops_test.model.applications[DATABASE_APP_NAME].set_config({ - "memory_max_prepared_transactions": "100", - "memory_shared_buffers": "128", - "response_lc_monetary": "en_GB.utf8", - "experimental_max_connections": "200", - }) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", idle_period=30) - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - password = await get_password(ops_test, any_unit_name) - - # Connect to PostgreSQL. - for unit_id in UNIT_IDS: - host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") - logger.info("connecting to the database host: %s", host) - try: - with psycopg2.connect( - f"dbname='postgres' user='operator' host='{host}' password='{password}' connect_timeout=1" - ) as connection, connection.cursor() as cursor: - settings_names = [ - "max_prepared_transactions", - "shared_buffers", - "lc_monetary", - "max_connections", - ] - cursor.execute( - sql.SQL("SELECT name,setting FROM pg_settings WHERE name IN ({});").format( - sql.SQL(", ").join(sql.Placeholder() * len(settings_names)) - ), - settings_names, - ) - records = cursor.fetchall() - settings = convert_records_to_dict(records) - - # Validate each configuration set by Patroni on PostgreSQL. - assert settings["max_prepared_transactions"] == "100" - assert settings["shared_buffers"] == "128" - assert settings["lc_monetary"] == "en_GB.utf8" - assert settings["max_connections"] == "200" - finally: - connection.close() - - -@pytest.mark.group(1) -async def test_scale_down_and_up(ops_test: OpsTest): - """Test data is replicated to new units after a scale up.""" - # Ensure the initial number of units in the application. - initial_scale = len(UNIT_IDS) - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale) - - # Scale down the application. - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale - 1) - - # Ensure the member was correctly removed from the cluster - # (by comparing the cluster members and the current units). - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # Scale up the application (2 more units than the current scale). - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale + 1) - - # Assert the correct members are part of the cluster. - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # Test the deletion of the unit that is both the leader and the primary. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary = await get_primary(ops_test, any_unit_name) - leader_unit = await find_unit(ops_test, leader=True, application=DATABASE_APP_NAME) - - # Trigger a switchover if the primary and the leader are not the same unit. - if primary != leader_unit.name: - switchover(ops_test, primary, leader_unit.name) - - # Get the new primary unit. - primary = await get_primary(ops_test, any_unit_name) - # Check that the primary changed. - for attempt in Retrying( - stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - assert primary == leader_unit.name - - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(leader_unit.name) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=1000, wait_for_exact_units=initial_scale - ) - - # Assert the correct members are part of the cluster. - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # Scale up the application (2 more units than the current scale). - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale + 2) - - # Test the deletion of both the unit that is the leader and the unit that is the primary. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary = await get_primary(ops_test, any_unit_name) - leader_unit = await find_unit(ops_test, DATABASE_APP_NAME, True) - - # Trigger a switchover if the primary and the leader are the same unit. - if primary == leader_unit.name: - switchover(ops_test, primary) - - # Get the new primary unit. - primary = await get_primary(ops_test, any_unit_name) - # Check that the primary changed. - for attempt in Retrying( - stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - assert primary != leader_unit.name - - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(primary, leader_unit.name) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="active", - timeout=2000, - wait_for_exact_units=initial_scale, - ) - - # Assert the correct members are part of the cluster. - await check_cluster_members(ops_test, DATABASE_APP_NAME) - - # End with the cluster having the initial number of units. - await scale_application(ops_test, DATABASE_APP_NAME, initial_scale) - - -@pytest.mark.group(1) -async def test_persist_data_through_primary_deletion(ops_test: OpsTest): - """Test data persists through a primary deletion.""" - # Set a composite application name in order to test in more than one series at the same time. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - for attempt in Retrying(stop=stop_after_attempt(3), wait=wait_fixed(5), reraise=True): - with attempt: - primary = await get_primary(ops_test, any_unit_name) - password = await get_password(ops_test, primary) - - # Write data to primary IP. - host = get_unit_address(ops_test, primary) - logger.info(f"connecting to primary {primary} on {host}") - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - cursor.execute("CREATE TABLE primarydeletiontest (testcol INT);") - connection.close() - - # Remove one unit. - await ops_test.model.destroy_units( - primary, - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1500) - - # Add the unit again. - await ops_test.model.applications[DATABASE_APP_NAME].add_unit(count=1) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=2000) - - # Testing write occurred to every postgres instance by reading from them - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - host = unit.public_address - logger.info("connecting to the database host: %s", host) - with db_connect(host, password) as connection: - with connection.cursor() as cursor: - # Ensure we can read from "primarydeletiontest" table - cursor.execute("SELECT * FROM primarydeletiontest;") - connection.close() diff --git a/tests/integration/test_db.py b/tests/integration/test_db.py deleted file mode 100644 index 5ea134700f..0000000000 --- a/tests/integration/test_db.py +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import asyncio -import logging - -import psycopg2 as psycopg2 -import pytest as pytest -from juju.errors import JujuUnitError -from mailmanclient import Client -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from . import markers -from .helpers import ( - APPLICATION_NAME, - CHARM_SERIES, - DATABASE_APP_NAME, - assert_sync_standbys, - build_connection_string, - check_database_users_existence, - check_databases_creation, - deploy_and_relate_application_with_postgresql, - deploy_and_relate_bundle_with_postgresql, - find_unit, - get_leader_unit, - run_command_on_unit, -) - -logger = logging.getLogger(__name__) - -LIVEPATCH_APP_NAME = "livepatch" -MAILMAN3_CORE_APP_NAME = "mailman3-core" -APPLICATION_UNITS = 1 -DATABASE_UNITS = 2 -RELATION_NAME = "db" - -ROLES_BLOCKING_MESSAGE = ( - "roles requested through relation, use postgresql_client interface instead" -) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_mailman3_core_db(ops_test: OpsTest, charm: str) -> None: - """Deploy Mailman3 Core to test the 'db' relation.""" - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=DATABASE_UNITS, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - - # Wait until the PostgreSQL charm is successfully deployed. - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="active", - timeout=1500, - wait_for_exact_units=DATABASE_UNITS, - ) - - # Extra config option for Mailman3 Core. - config = {"hostname": "example.org"} - # Deploy and test the deployment of Mailman3 Core. - relation_id = await deploy_and_relate_application_with_postgresql( - ops_test, - "mailman3-core", - MAILMAN3_CORE_APP_NAME, - APPLICATION_UNITS, - config, - ) - await check_databases_creation(ops_test, ["mailman3"]) - - mailman3_core_users = [f"relation-{relation_id}"] - - await check_database_users_existence(ops_test, mailman3_core_users, []) - - # Assert Mailman3 Core is configured to use PostgreSQL instead of SQLite. - mailman_unit = ops_test.model.applications[MAILMAN3_CORE_APP_NAME].units[0] - result = await run_command_on_unit(ops_test, mailman_unit.name, "mailman info") - assert "db url: postgres://" in result - - # Do some CRUD operations using Mailman3 Core client. - domain_name = "canonical.com" - list_name = "postgresql-list" - credentials = ( - result.split("credentials: ")[1].strip().split(":") - ) # This outputs a list containing username and password. - client = Client( - f"http://{mailman_unit.public_address}:8001/3.1", credentials[0], credentials[1] - ) - - # Create a domain and list the domains to check that the new one is there. - domain = client.create_domain(domain_name) - assert domain_name in [domain.mail_host for domain in client.domains] - - # Update the domain by creating a mailing list into it. - mailing_list = domain.create_list(list_name) - assert mailing_list.fqdn_listname in [ - mailing_list.fqdn_listname for mailing_list in domain.lists - ] - - # Delete the domain and check that the change was persisted. - domain.delete() - assert domain_name not in [domain.mail_host for domain in client.domains] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_relation_data_is_updated_correctly_when_scaling(ops_test: OpsTest): - """Test that relation data, like connection data, is updated correctly when scaling.""" - # Retrieve the list of current database unit names. - units_to_remove = [unit.name for unit in ops_test.model.applications[DATABASE_APP_NAME].units] - - async with ops_test.fast_forward(): - # Add two more units. - await ops_test.model.applications[DATABASE_APP_NAME].add_units(2) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=1500, wait_for_exact_units=4 - ) - - assert_sync_standbys( - ops_test.model.applications[DATABASE_APP_NAME].units[0].public_address, 2 - ) - - # Remove the original units. - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(*[ - unit for unit in units_to_remove if unit != leader_unit.name - ]) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=3 - ) - await ops_test.model.applications[DATABASE_APP_NAME].destroy_units(leader_unit.name) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=600, wait_for_exact_units=2 - ) - - # Get the updated connection data and assert it can be used - # to write and read some data properly. - database_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary_connection_string = await build_connection_string( - ops_test, MAILMAN3_CORE_APP_NAME, RELATION_NAME, remote_unit_name=database_unit_name - ) - replica_connection_string = await build_connection_string( - ops_test, - MAILMAN3_CORE_APP_NAME, - RELATION_NAME, - read_only_endpoint=True, - remote_unit_name=database_unit_name, - ) - - # Connect to the database using the primary connection string. - with psycopg2.connect(primary_connection_string) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - # Check that it's possible to write and read data from the database that - # was created for the application. - cursor.execute("DROP TABLE IF EXISTS test;") - cursor.execute("CREATE TABLE test(data TEXT);") - cursor.execute("INSERT INTO test(data) VALUES('some data');") - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - connection.close() - - # Connect to the database using the replica endpoint. - with psycopg2.connect(replica_connection_string) as connection: - with connection.cursor() as cursor: - # Read some data. - cursor.execute("SELECT data FROM test;") - data = cursor.fetchone() - assert data[0] == "some data" - - # Try to alter some data in a read-only transaction. - with pytest.raises(psycopg2.errors.ReadOnlySqlTransaction): - cursor.execute("DROP TABLE test;") - connection.close() - - # Remove the relation and test that its user was deleted - # (by checking that the connection string doesn't work anymore). - async with ops_test.fast_forward(): - await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( - f"{DATABASE_APP_NAME}:{RELATION_NAME}", f"{MAILMAN3_CORE_APP_NAME}:{RELATION_NAME}" - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): - with attempt: - with pytest.raises(psycopg2.OperationalError): - psycopg2.connect(primary_connection_string) - - -@pytest.mark.group(1) -@markers.amd64_only # sentry snap not available for arm64 -async def test_sentry_db_blocked(ops_test: OpsTest, charm: str) -> None: - async with ops_test.fast_forward(): - # Deploy Sentry and its dependencies. - await asyncio.gather( - ops_test.model.deploy( - "omnivector-sentry", application_name="sentry1", series="bionic" - ), - ops_test.model.deploy("haproxy", series="focal"), - ops_test.model.deploy("omnivector-redis", application_name="redis", series="bionic"), - ) - await ops_test.model.wait_for_idle( - apps=["sentry1"], - status="blocked", - raise_on_blocked=False, - timeout=1000, - ) - await asyncio.gather( - ops_test.model.relate("sentry1", "redis"), - ops_test.model.relate("sentry1", f"{DATABASE_APP_NAME}:db"), - ops_test.model.relate("sentry1", "haproxy"), - ) - - # Only the leader will block - leader_unit = await find_unit(ops_test, DATABASE_APP_NAME, True) - - try: - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="blocked", - raise_on_blocked=True, - timeout=1000, - ) - assert False, "Leader didn't block" - except JujuUnitError: - pass - - assert ( - leader_unit.workload_status_message - == "extensions requested through relation, enable them through config options" - ) - - # Verify that the charm unblocks when the extensions are enabled after being blocked - # due to disabled extensions. - logger.info("Verifying that the charm unblocks when the extensions are enabled") - config = {"plugin_citext_enable": "True"} - await ops_test.model.applications[DATABASE_APP_NAME].set_config(config) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, "sentry1"], - status="active", - raise_on_blocked=False, - idle_period=15, - ) - - # Verify that the charm doesn't block when the extensions are enabled - # (another sentry deployment is used because it doesn't request a database - # again after the relation with the PostgreSQL charm is destroyed and reestablished). - logger.info("Verifying that the charm doesn't block when the extensions are enabled") - await asyncio.gather( - ops_test.model.remove_application("sentry1", block_until_done=True), - ops_test.model.deploy( - "omnivector-sentry", application_name="sentry2", series="bionic" - ), - ) - await asyncio.gather( - ops_test.model.relate("sentry2", "redis"), - ops_test.model.relate("sentry2", f"{DATABASE_APP_NAME}:db"), - ops_test.model.relate("sentry2", "haproxy"), - ) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, "sentry2"], status="active", raise_on_blocked=False - ) - - await asyncio.gather( - ops_test.model.remove_application("redis", block_until_done=True), - ops_test.model.remove_application("sentry2", block_until_done=True), - ops_test.model.remove_application("haproxy", block_until_done=True), - ) - - -@pytest.mark.group(1) -async def test_roles_blocking(ops_test: OpsTest, charm: str) -> None: - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=APPLICATION_NAME, - config={"legacy_roles": True}, - series=CHARM_SERIES, - channel="edge", - ) - await ops_test.model.deploy( - APPLICATION_NAME, - application_name=f"{APPLICATION_NAME}2", - config={"legacy_roles": True}, - series=CHARM_SERIES, - channel="edge", - ) - - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME, APPLICATION_NAME, f"{APPLICATION_NAME}2"], - status="active", - timeout=1000, - ) - - await asyncio.gather( - ops_test.model.relate(f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}:db"), - ops_test.model.relate(f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}2:db"), - ) - - leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME) - await ops_test.model.block_until( - lambda: leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE, timeout=1000 - ) - - assert leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE - - logger.info("Verify that the charm remains blocked if there are other blocking relations") - await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( - f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}:db" - ) - - await ops_test.model.block_until( - lambda: leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE, timeout=1000 - ) - - assert leader_unit.workload_status_message == ROLES_BLOCKING_MESSAGE - - logger.info("Verify that active status is restored when all blocking relations are gone") - await ops_test.model.applications[DATABASE_APP_NAME].destroy_relation( - f"{DATABASE_APP_NAME}:db", f"{APPLICATION_NAME}2:db" - ) - - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], - status="active", - timeout=1000, - ) - - -@markers.juju2 -@pytest.mark.group(1) -@markers.amd64_only # canonical-livepatch-server charm (in bundle) not available for arm64 -async def test_canonical_livepatch_onprem_bundle_db(ops_test: OpsTest) -> None: - # Deploy and test the Livepatch onprem bundle (using this PostgreSQL charm - # and an overlay to make the Ubuntu Advantage charm work with PostgreSQL). - # We intentionally wait for the `✘ sync_token not set` status message as we - # aren't providing an Ubuntu Pro token (as this is just a test to ensure - # the database works in the context of the relation with the Livepatch charm). - overlay = { - "applications": {"ubuntu-advantage": {"charm": "ubuntu-advantage", "series": CHARM_SERIES}} - } - await deploy_and_relate_bundle_with_postgresql( - ops_test, - "canonical-livepatch-onprem", - LIVEPATCH_APP_NAME, - relation_name="db", - status="blocked", - status_message="✘ sync_token not set", - overlay=overlay, - ) - - action = await ops_test.model.units.get(f"{LIVEPATCH_APP_NAME}/0").run_action("schema-upgrade") - await action.wait() - assert action.results.get("Code") == "0", "schema-upgrade action hasn't succeeded" diff --git a/tests/integration/test_db_admin.py b/tests/integration/test_db_admin.py deleted file mode 100644 index b8ad190d34..0000000000 --- a/tests/integration/test_db_admin.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import json -import logging - -import psycopg2 -import pytest -from landscape_api.base import HTTPError, run_query -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_delay, wait_fixed - -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - build_connection_string, - check_database_users_existence, - check_databases_creation, - deploy_and_relate_bundle_with_postgresql, - ensure_correct_relation_data, - get_landscape_api_credentials, - get_machine_from_unit, - get_primary, - primary_changed, - start_machine, - stop_machine, - switchover, -) - -logger = logging.getLogger(__name__) - -HAPROXY_APP_NAME = "haproxy" -LANDSCAPE_APP_NAME = "landscape-server" -RABBITMQ_APP_NAME = "rabbitmq-server" -DATABASE_UNITS = 3 -RELATION_NAME = "db-admin" - - -@pytest.mark.group(1) -async def test_landscape_scalable_bundle_db(ops_test: OpsTest, charm: str) -> None: - """Deploy Landscape Scalable Bundle to test the 'db-admin' relation.""" - await ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=DATABASE_UNITS, - series=CHARM_SERIES, - config={"profile": "testing", "plugin_plpython3u_enable": "True"}, - ) - - # Deploy and test the Landscape Scalable bundle (using this PostgreSQL charm). - relation_id = await deploy_and_relate_bundle_with_postgresql( - ops_test, - "ch:landscape-scalable", - LANDSCAPE_APP_NAME, - main_application_num_units=2, - relation_name=RELATION_NAME, - timeout=3000, - ) - await check_databases_creation( - ops_test, - [ - "landscape-standalone-account-1", - "landscape-standalone-knowledge", - "landscape-standalone-main", - "landscape-standalone-package", - "landscape-standalone-resource-1", - "landscape-standalone-session", - ], - ) - - landscape_users = [f"relation-{relation_id}"] - - await check_database_users_existence(ops_test, landscape_users, []) - - # Create the admin user on Landscape through configs. - await ops_test.model.applications["landscape-server"].set_config({ - "admin_email": "admin@canonical.com", - "admin_name": "Admin", - "admin_password": "test1234", - }) - await ops_test.model.wait_for_idle( - apps=["landscape-server", DATABASE_APP_NAME], - status="active", - timeout=1200, - ) - - # Connect to the Landscape API through HAProxy and do some CRUD calls (without the update). - key, secret = await get_landscape_api_credentials(ops_test) - haproxy_unit = ops_test.model.applications[HAPROXY_APP_NAME].units[0] - api_uri = f"https://{haproxy_unit.public_address}/api/" - - # Create a role and list the available roles later to check that the new one is there. - role_name = "User1" - run_query(key, secret, "CreateRole", {"name": role_name}, api_uri, False) - api_response = run_query(key, secret, "GetRoles", {}, api_uri, False) - assert role_name in [user["name"] for user in json.loads(api_response)] - - # Remove the role and assert it isn't part of the roles list anymore. - run_query(key, secret, "RemoveRole", {"name": role_name}, api_uri, False) - api_response = run_query(key, secret, "GetRoles", {}, api_uri, False) - assert role_name not in [user["name"] for user in json.loads(api_response)] - - await ensure_correct_relation_data(ops_test, DATABASE_UNITS, LANDSCAPE_APP_NAME, RELATION_NAME) - - # Enable automatically-retry-hooks due to https://bugs.launchpad.net/juju/+bug/1999758 - # (the implemented workaround restarts the unit in the middle of the start hook, - # so the hook fails, and it's not retried on CI). - await ops_test.model.set_config({"automatically-retry-hooks": "true"}) - - # Stop the primary unit machine. - logger.info("restarting primary") - former_primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - former_primary_machine = await get_machine_from_unit(ops_test, former_primary) - await stop_machine(ops_test, former_primary_machine) - - # Await for a new primary to be elected. - assert await primary_changed(ops_test, former_primary) - - # Start the former primary unit machine again. - await start_machine(ops_test, former_primary_machine) - - # Wait for the unit to be ready again. Some errors in the start hook may happen due to - # rebooting the unit machine in the middle of a hook (what is needed when the issue from - # https://bugs.launchpad.net/juju/+bug/1999758 happens). - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", timeout=1500, raise_on_error=False - ) - - await ensure_correct_relation_data(ops_test, DATABASE_UNITS, LANDSCAPE_APP_NAME, RELATION_NAME) - - # Trigger a switchover. - logger.info("triggering a switchover") - primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - switchover(ops_test, primary) - - # Await for a new primary to be elected. - assert await primary_changed(ops_test, primary) - - await ensure_correct_relation_data(ops_test, DATABASE_UNITS, LANDSCAPE_APP_NAME, RELATION_NAME) - - # Trigger a config change to start the Landscape API service again. - # The Landscape API was stopped after a new primary (postgresql) was elected. - await ops_test.model.applications["landscape-server"].set_config({ - "admin_name": "Admin 1", - }) - await ops_test.model.wait_for_idle( - apps=["landscape-server", DATABASE_APP_NAME], timeout=1500, status="active" - ) - - # Create a role and list the available roles later to check that the new one is there. - role_name = "User2" - try: - run_query(key, secret, "CreateRole", {"name": role_name}, api_uri, False) - except HTTPError as e: - assert False, f"error when trying to create role on Landscape: {e}" - - database_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - connection_string = await build_connection_string( - ops_test, LANDSCAPE_APP_NAME, RELATION_NAME, remote_unit_name=database_unit_name - ) - - # Remove the applications from the bundle. - await ops_test.model.remove_application(LANDSCAPE_APP_NAME, block_until_done=True) - await ops_test.model.remove_application(HAPROXY_APP_NAME, block_until_done=True) - await ops_test.model.remove_application(RABBITMQ_APP_NAME, block_until_done=True) - - # Remove the relation and test that its user was deleted - # (by checking that the connection string doesn't work anymore). - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) - for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(10)): - with attempt: - with pytest.raises(psycopg2.OperationalError): - psycopg2.connect(connection_string) - - # Remove the PostgreSQL application. - await ops_test.model.remove_application(DATABASE_APP_NAME, block_until_done=True) diff --git a/tests/integration/test_password_rotation.py b/tests/integration/test_password_rotation.py deleted file mode 100644 index ffb4cca458..0000000000 --- a/tests/integration/test_password_rotation.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import json -import time - -import psycopg2 -import pytest -from pytest_operator.plugin import OpsTest - -from . import markers -from .helpers import ( - CHARM_SERIES, - METADATA, - check_patroni, - db_connect, - get_leader_unit, - get_password, - get_primary, - get_unit_address, - restart_patroni, - run_command_on_unit, - set_password, -) - -APP_NAME = METADATA["name"] - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy_active(ops_test: OpsTest): - """Build the charm and deploy it.""" - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=APP_NAME, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1500) - - -@pytest.mark.group(1) -async def test_password_rotation(ops_test: OpsTest): - """Test password rotation action.""" - # Get the initial passwords set for the system users. - any_unit_name = ops_test.model.applications[APP_NAME].units[0].name - superuser_password = await get_password(ops_test, any_unit_name) - replication_password = await get_password(ops_test, any_unit_name, "replication") - monitoring_password = await get_password(ops_test, any_unit_name, "monitoring") - backup_password = await get_password(ops_test, any_unit_name, "backup") - rewind_password = await get_password(ops_test, any_unit_name, "rewind") - - # Get the leader unit name (because passwords can only be set through it). - leader = None - for unit in ops_test.model.applications[APP_NAME].units: - if await unit.is_leader_from_status(): - leader = unit.name - break - - # Change both passwords. - result = await set_password(ops_test, unit_name=leader) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For replication, generate a specific password and pass it to the action. - new_replication_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="replication", password=new_replication_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For monitoring, generate a specific password and pass it to the action. - new_monitoring_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="monitoring", password=new_monitoring_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For backup, generate a specific password and pass it to the action. - new_backup_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="backup", password=new_backup_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - # For rewind, generate a specific password and pass it to the action. - new_rewind_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="rewind", password=new_rewind_password - ) - assert "password" in result.keys() - await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - new_superuser_password = await get_password(ops_test, any_unit_name) - assert superuser_password != new_superuser_password - assert new_replication_password == await get_password(ops_test, any_unit_name, "replication") - assert replication_password != new_replication_password - assert new_monitoring_password == await get_password(ops_test, any_unit_name, "monitoring") - assert monitoring_password != new_monitoring_password - assert new_backup_password == await get_password(ops_test, any_unit_name, "backup") - assert backup_password != new_backup_password - assert new_rewind_password == await get_password(ops_test, any_unit_name, "rewind") - assert rewind_password != new_rewind_password - - # Restart Patroni on any non-leader unit and check that - # Patroni and PostgreSQL continue to work. - restart_time = time.time() - for unit in ops_test.model.applications[APP_NAME].units: - if not await unit.is_leader_from_status(): - restart_patroni(ops_test, unit.name) - assert check_patroni(ops_test, unit.name, restart_time) - - -@pytest.mark.group(1) -@markers.juju_secrets -async def test_password_from_secret_same_as_cli(ops_test: OpsTest): - """Checking if password is same as returned by CLI. - - I.e. we're manipulating the secret we think we're manipulating. - """ - # - # No way to retrieve a secet by label for now (https://bugs.launchpad.net/juju/+bug/2037104) - # Therefore we take advantage of the fact, that we only have ONE single secret a this point - # So we take the single member of the list - # NOTE: This would BREAK if for instance units had secrets at the start... - # - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - password = await get_password(ops_test, unit_name=leader, username="replication") - complete_command = "list-secrets" - _, stdout, _ = await ops_test.juju(*complete_command.split()) - secret_id = stdout.split("\n")[1].split(" ")[0] - - # Getting back the pw from juju CLI - complete_command = f"show-secret {secret_id} --reveal --format=json" - _, stdout, _ = await ops_test.juju(*complete_command.split()) - data = json.loads(stdout) - assert data[secret_id]["content"]["Data"]["replication-password"] == password - - -@pytest.mark.group(1) -async def test_empty_password(ops_test: OpsTest) -> None: - """Test that the password can't be set to an empty string.""" - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - await set_password(ops_test, unit_name=leader, username="replication", password="") - password = await get_password(ops_test, unit_name=leader, username="replication") - # The password is 'None', BUT NOT because of SECRET_DELETED_LABEL - # `get_secret()` returns a None value (as the field in the secret is set to string value "None") - # And this true None value is turned to a string when the event is setting results. - assert password == "None" - - -@pytest.mark.group(1) -async def test_db_connection_with_empty_password(ops_test: OpsTest): - """Test that user can't connect with empty password.""" - primary = await get_primary(ops_test, f"{APP_NAME}/0") - address = get_unit_address(ops_test, primary) - with pytest.raises(psycopg2.Error): - with db_connect(address, "") as connection: - connection.close() - - -@pytest.mark.group(1) -async def test_no_password_change_on_invalid_password(ops_test: OpsTest) -> None: - """Test that in general, there is no change when password validation fails.""" - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - password1 = await get_password(ops_test, unit_name=leader, username="replication") - # The password has to be minimum 3 characters - await set_password(ops_test, unit_name=leader, username="replication", password="ca" * 1000000) - password2 = await get_password(ops_test, unit_name=leader, username="replication") - # The password didn't change - assert password1 == password2 - - -@pytest.mark.group(1) -async def test_no_password_exposed_on_logs(ops_test: OpsTest) -> None: - """Test that passwords don't get exposed on postgresql logs.""" - for unit in ops_test.model.applications[APP_NAME].units: - try: - logs = await run_command_on_unit( - ops_test, - unit.name, - "grep PASSWORD /var/snap/charmed-postgresql/common/var/log/postgresql/postgresql-*.log", - ) - except Exception: - continue - assert len(logs) == 0, f"Sensitive information detected on {unit.name} logs" diff --git a/tests/integration/test_plugins.py b/tests/integration/test_plugins.py deleted file mode 100644 index 5d78dcd3aa..0000000000 --- a/tests/integration/test_plugins.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. -import logging - -import psycopg2 as psycopg2 -import pytest as pytest -from pytest_operator.plugin import OpsTest - -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - db_connect, - get_password, - get_primary, - get_unit_address, -) - -logger = logging.getLogger(__name__) - -CITEXT_EXTENSION_STATEMENT = "CREATE TABLE citext_test (value CITEXT);" -DEBVERSION_EXTENSION_STATEMENT = "CREATE TABLE debversion_test (value DEBVERSION);" -HSTORE_EXTENSION_STATEMENT = "CREATE TABLE hstore_test (value hstore);" -PG_TRGM_EXTENSION_STATEMENT = "SELECT word_similarity('word', 'two words');" -PLPYTHON3U_EXTENSION_STATEMENT = 'CREATE FUNCTION plpython_test() RETURNS varchar[] AS $$ return "hello" $$ LANGUAGE plpython3u;' -UNACCENT_EXTENSION_STATEMENT = "SELECT ts_lexize('unaccent','Hôtel');" -BLOOM_EXTENSION_STATEMENT = ( - "CREATE TABLE tbloom_test (i int);CREATE INDEX btreeidx ON tbloom_test USING bloom (i);" -) -BTREEGIN_EXTENSION_STATEMENT = "CREATE TABLE btree_gin_test (a int4);CREATE INDEX btreeginidx ON btree_gin_test USING GIN (a);" -BTREEGIST_EXTENSION_STATEMENT = "CREATE TABLE btree_gist_test (a int4);CREATE INDEX btreegistidx ON btree_gist_test USING GIST (a);" -CUBE_EXTENSION_STATEMENT = "SELECT cube_inter('(0,-1),(1,1)', '(-2),(2)');" -DICTINT_EXTENSION_STATEMENT = "SELECT ts_lexize('intdict', '12345678');" -DICTXSYN_EXTENSION_STATEMENT = "SELECT ts_lexize('xsyn', 'word');" -EARTHDISTANCE_EXTENSION_STATEMENT = "SELECT earth_distance(ll_to_earth(-81.3927381, 30.2918842),ll_to_earth(-87.6473133, 41.8853881));" -FUZZYSTRMATCH_EXTENSION_STATEMENT = "SELECT soundex('hello world!');" -INTARRAY_EXTENSION_STATEMENT = "CREATE TABLE intarray_test (mid INT PRIMARY KEY, sections INT[]);SELECT intarray_test.mid FROM intarray_test WHERE intarray_test.sections @> '{1,2}';" -ISN_EXTENSION_STATEMENT = "SELECT isbn('978-0-393-04002-9');" -LO_EXTENSION_STATEMENT = "CREATE TABLE lo_test (value lo);" -LTREE_EXTENSION_STATEMENT = "CREATE TABLE ltree_test (path ltree);" -OLD_SNAPSHOT_EXTENSION_STATEMENT = "SELECT * from pg_old_snapshot_time_mapping();" -PG_FREESPACEMAP_EXTENSION_STATEMENT = ( - "CREATE TABLE pg_freespacemap_test (i int);SELECT * FROM pg_freespace('pg_freespacemap_test');" -) -PGROWLOCKS_EXTENSION_STATEMENT = ( - "CREATE TABLE pgrowlocks_test (i int);SELECT * FROM pgrowlocks('pgrowlocks_test');" -) -PGSTATTUPLE_EXTENSION_STATEMENT = "SELECT * FROM pgstattuple('pg_catalog.pg_proc');" -PG_VISIBILITY_EXTENSION_STATEMENT = "CREATE TABLE pg_visibility_test (i int);SELECT * FROM pg_visibility('pg_visibility_test'::regclass);" -SEG_EXTENSION_STATEMENT = "SELECT '10(+-)1'::seg as seg;" -TABLEFUNC_EXTENSION_STATEMENT = "SELECT * FROM normal_rand(1000, 5, 3);" -TCN_EXTENSION_STATEMENT = "CREATE TABLE tcn_test (i int);CREATE TRIGGER tcn_test_idx AFTER INSERT OR UPDATE OR DELETE ON tcn_test FOR EACH ROW EXECUTE FUNCTION TRIGGERED_CHANGE_NOTIFICATION();" -TSM_SYSTEM_ROWS_EXTENSION_STATEMENT = "CREATE TABLE tsm_system_rows_test (i int);SELECT * FROM tsm_system_rows_test TABLESAMPLE SYSTEM_ROWS(100);" -TSM_SYSTEM_TIME_EXTENSION_STATEMENT = "CREATE TABLE tsm_system_time_test (i int);SELECT * FROM tsm_system_time_test TABLESAMPLE SYSTEM_TIME(1000);" -UUID_OSSP_EXTENSION_STATEMENT = "SELECT uuid_nil();" -REFINT_EXTENSION_STATEMENT = "CREATE TABLE A (ID int4 not null); CREATE UNIQUE INDEX AI ON A (ID);CREATE TABLE B (REFB int4);CREATE INDEX BI ON B (REFB);CREATE TRIGGER BT BEFORE INSERT OR UPDATE ON B FOR EACH ROW EXECUTE PROCEDURE check_primary_key ('REFB', 'A', 'ID');" -AUTOINC_EXTENSION_STATEMENT = "CREATE TABLE ids (id int4, idesc text);CREATE TRIGGER ids_nextid BEFORE INSERT OR UPDATE ON ids FOR EACH ROW EXECUTE PROCEDURE autoinc (id, next_id);" -INSERT_USERNAME_EXTENSION_STATEMENT = "CREATE TABLE username_test (name text, username text not null);CREATE TRIGGER insert_usernames BEFORE INSERT OR UPDATE ON username_test FOR EACH ROW EXECUTE PROCEDURE insert_username (username);" -MODDATETIME_EXTENSION_STATEMENT = "CREATE TABLE mdt (moddate timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL);CREATE TRIGGER mdt_moddatetime BEFORE UPDATE ON mdt FOR EACH ROW EXECUTE PROCEDURE moddatetime (moddate);" -BOOL_PLPERL_EXTENSION_STATEMENT = "CREATE FUNCTION hello_bool(bool) RETURNS TEXT TRANSFORM FOR TYPE bool LANGUAGE plperl AS $$ my $with_world = shift; return sprintf('hello%s', $with_world ? ' world' : ''); $$;" -HLL_EXTENSION_STATEMENT = "CREATE TABLE hll_test (users hll);" -HYPOPG_EXTENSION_STATEMENT = "CREATE TABLE hypopg_test (id integer, val text); SELECT hypopg_create_index('CREATE INDEX ON hypopg_test (id)');" -IP4R_EXTENSION_STATEMENT = "CREATE TABLE ip4r_test (ip ip4);" -JSONB_PLPERL_EXTENSION_STATEMENT = "CREATE OR REPLACE FUNCTION jsonb_plperl_test(val jsonb) RETURNS jsonb TRANSFORM FOR TYPE jsonb LANGUAGE plperl as $$ return $_[0]; $$;" -ORAFCE_EXTENSION_STATEMENT = "SELECT add_months(date '2005-05-31',1);" -PG_SIMILARITY_EXTENSION_STATEMENT = "SHOW pg_similarity.levenshtein_threshold;" -PLPERL_EXTENSION_STATEMENT = "CREATE OR REPLACE FUNCTION plperl_test(name text) RETURNS text AS $$ return $_SHARED{$_[0]}; $$ LANGUAGE plperl;" -PREFIX_EXTENSION_STATEMENT = "SELECT '123'::prefix_range @> '123456';" -RDKIT_EXTENSION_STATEMENT = "SELECT is_valid_smiles('CCC');" -TDS_FDW_EXTENSION_STATEMENT = "CREATE SERVER mssql_svr FOREIGN DATA WRAPPER tds_fdw OPTIONS (servername 'tds_fdw_test', port '3306', database 'tds_fdw_test', tds_version '7.1');" -ICU_EXT_EXTENSION_STATEMENT = ( - 'CREATE COLLATION "vat-lat" (provider = icu, locale = "la-VA-u-kn-true")' -) -PLTCL_EXTENSION_STATEMENT = ( - "CREATE FUNCTION pltcl_test(integer) RETURNS integer AS $$ return $1 $$ LANGUAGE pltcl STRICT;" -) -POSTGIS_EXTENSION_STATEMENT = "SELECT PostGIS_Full_Version();" -ADDRESS_STANDARDIZER_EXTENSION_STATEMENT = "SELECT num, street, city, zip, zipplus FROM parse_address('1 Devonshire Place, Boston, MA 02109-1234');" -ADDRESS_STANDARDIZER_DATA_US_EXTENSION_STATEMENT = "SELECT house_num, name, suftype, city, country, state, unit FROM standardize_address('us_lex', 'us_gaz', 'us_rules', 'One Devonshire Place, PH 301, Boston, MA 02109');" -POSTGIS_TIGER_GEOCODER_EXTENSION_STATEMENT = "SELECT * FROM standardize_address('tiger.pagc_lex', 'tiger.pagc_gaz', 'tiger.pagc_rules', 'One Devonshire Place, PH 301, Boston, MA 02109-1234');" -POSTGIS_TOPOLOGY_EXTENSION_STATEMENT = "SELECT topology.CreateTopology('nyc_topo', 26918, 0.5);" -POSTGIS_RASTER_EXTENSION_STATEMENT = ( - "CREATE TABLE test_postgis_raster (name varchar, rast raster);" -) -VECTOR_EXTENSION_STATEMENT = ( - "CREATE TABLE vector_test (id bigserial PRIMARY KEY, embedding vector(3));" -) -TIMESCALEDB_EXTENSION_STATEMENT = "CREATE TABLE test_timescaledb (time TIMESTAMPTZ NOT NULL); SELECT create_hypertable('test_timescaledb', 'time');" - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_plugins(ops_test: OpsTest) -> None: - """Build and deploy one unit of PostgreSQL and then test the available plugins.""" - # Build and deploy the PostgreSQL charm. - async with ops_test.fast_forward(): - charm = await ops_test.build_charm(".") - await ops_test.model.deploy( - charm, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1500) - - sql_tests = { - "plugin_citext_enable": CITEXT_EXTENSION_STATEMENT, - "plugin_debversion_enable": DEBVERSION_EXTENSION_STATEMENT, - "plugin_hstore_enable": HSTORE_EXTENSION_STATEMENT, - "plugin_pg_trgm_enable": PG_TRGM_EXTENSION_STATEMENT, - "plugin_plpython3u_enable": PLPYTHON3U_EXTENSION_STATEMENT, - "plugin_unaccent_enable": UNACCENT_EXTENSION_STATEMENT, - "plugin_bloom_enable": BLOOM_EXTENSION_STATEMENT, - "plugin_btree_gin_enable": BTREEGIN_EXTENSION_STATEMENT, - "plugin_btree_gist_enable": BTREEGIST_EXTENSION_STATEMENT, - "plugin_cube_enable": CUBE_EXTENSION_STATEMENT, - "plugin_dict_int_enable": DICTINT_EXTENSION_STATEMENT, - "plugin_dict_xsyn_enable": DICTXSYN_EXTENSION_STATEMENT, - "plugin_earthdistance_enable": EARTHDISTANCE_EXTENSION_STATEMENT, - "plugin_fuzzystrmatch_enable": FUZZYSTRMATCH_EXTENSION_STATEMENT, - "plugin_intarray_enable": INTARRAY_EXTENSION_STATEMENT, - "plugin_isn_enable": ISN_EXTENSION_STATEMENT, - "plugin_lo_enable": LO_EXTENSION_STATEMENT, - "plugin_ltree_enable": LTREE_EXTENSION_STATEMENT, - "plugin_old_snapshot_enable": OLD_SNAPSHOT_EXTENSION_STATEMENT, - "plugin_pg_freespacemap_enable": PG_FREESPACEMAP_EXTENSION_STATEMENT, - "plugin_pgrowlocks_enable": PGROWLOCKS_EXTENSION_STATEMENT, - "plugin_pgstattuple_enable": PGSTATTUPLE_EXTENSION_STATEMENT, - "plugin_pg_visibility_enable": PG_VISIBILITY_EXTENSION_STATEMENT, - "plugin_seg_enable": SEG_EXTENSION_STATEMENT, - "plugin_tablefunc_enable": TABLEFUNC_EXTENSION_STATEMENT, - "plugin_tcn_enable": TCN_EXTENSION_STATEMENT, - "plugin_tsm_system_rows_enable": TSM_SYSTEM_ROWS_EXTENSION_STATEMENT, - "plugin_tsm_system_time_enable": TSM_SYSTEM_TIME_EXTENSION_STATEMENT, - "plugin_uuid_ossp_enable": UUID_OSSP_EXTENSION_STATEMENT, - "plugin_spi_enable": [ - REFINT_EXTENSION_STATEMENT, - AUTOINC_EXTENSION_STATEMENT, - INSERT_USERNAME_EXTENSION_STATEMENT, - MODDATETIME_EXTENSION_STATEMENT, - ], - "plugin_bool_plperl_enable": BOOL_PLPERL_EXTENSION_STATEMENT, - "plugin_hll_enable": HLL_EXTENSION_STATEMENT, - "plugin_postgis_enable": POSTGIS_EXTENSION_STATEMENT, - "plugin_hypopg_enable": HYPOPG_EXTENSION_STATEMENT, - "plugin_ip4r_enable": IP4R_EXTENSION_STATEMENT, - "plugin_plperl_enable": PLPERL_EXTENSION_STATEMENT, - "plugin_jsonb_plperl_enable": JSONB_PLPERL_EXTENSION_STATEMENT, - "plugin_orafce_enable": ORAFCE_EXTENSION_STATEMENT, - "plugin_pg_similarity_enable": ORAFCE_EXTENSION_STATEMENT, - "plugin_prefix_enable": PREFIX_EXTENSION_STATEMENT, - "plugin_rdkit_enable": RDKIT_EXTENSION_STATEMENT, - "plugin_tds_fdw_enable": TDS_FDW_EXTENSION_STATEMENT, - "plugin_icu_ext_enable": ICU_EXT_EXTENSION_STATEMENT, - "plugin_pltcl_enable": PLTCL_EXTENSION_STATEMENT, - "plugin_address_standardizer_enable": ADDRESS_STANDARDIZER_EXTENSION_STATEMENT, - "plugin_address_standardizer_data_us_enable": ADDRESS_STANDARDIZER_DATA_US_EXTENSION_STATEMENT, - "plugin_postgis_tiger_geocoder_enable": POSTGIS_TIGER_GEOCODER_EXTENSION_STATEMENT, - "plugin_postgis_raster_enable": POSTGIS_RASTER_EXTENSION_STATEMENT, - "plugin_postgis_topology_enable": POSTGIS_TOPOLOGY_EXTENSION_STATEMENT, - "plugin_vector_enable": VECTOR_EXTENSION_STATEMENT, - "plugin_timescaledb_enable": TIMESCALEDB_EXTENSION_STATEMENT, - } - - def enable_disable_config(enabled: False): - config = {} - for plugin in sql_tests.keys(): - config[plugin] = f"{enabled}" - return config - - # Check that the available plugins are disabled. - primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - password = await get_password(ops_test, primary) - address = get_unit_address(ops_test, primary) - - config = enable_disable_config(False) - await ops_test.model.applications[DATABASE_APP_NAME].set_config(config) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active") - - logger.info("checking that the plugins are disabled") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - for query in sql_tests.values(): - if isinstance(query, list): - for test in query: - with pytest.raises(psycopg2.Error): - connection.cursor().execute(test) - else: - with pytest.raises(psycopg2.Error): - connection.cursor().execute(query) - connection.close() - - # Enable the plugins. - logger.info("enabling the plugins") - - config = enable_disable_config(True) - await ops_test.model.applications[DATABASE_APP_NAME].set_config(config) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active") - - # Check that the available plugins are enabled. - logger.info("checking that the plugins are enabled") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - for query in sql_tests.values(): - if isinstance(query, list): - for test in query: - connection.cursor().execute(test) - else: - connection.cursor().execute(query) - connection.close() diff --git a/tests/integration/test_subordinates.py b/tests/integration/test_subordinates.py deleted file mode 100644 index f9d30dedba..0000000000 --- a/tests/integration/test_subordinates.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging -from asyncio import gather - -import pytest -from pytest_operator.plugin import OpsTest - -from .helpers import ( - CHARM_SERIES, - scale_application, -) - -DATABASE_APP_NAME = "pg" -LS_CLIENT = "landscape-client" -UBUNTU_PRO_APP_NAME = "ubuntu-advantage" - -logger = logging.getLogger(__name__) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_deploy(ops_test: OpsTest, charm: str, github_secrets): - await gather( - ops_test.model.deploy( - charm, - application_name=DATABASE_APP_NAME, - num_units=3, - series=CHARM_SERIES, - ), - ops_test.model.deploy( - UBUNTU_PRO_APP_NAME, - config={"token": github_secrets["UBUNTU_PRO_TOKEN"]}, - channel="latest/edge", - num_units=0, - ), - ops_test.model.deploy( - LS_CLIENT, - config={ - "account-name": github_secrets["LANDSCAPE_ACCOUNT_NAME"], - "registration-key": github_secrets["LANDSCAPE_REGISTRATION_KEY"], - "ppa": "ppa:landscape/self-hosted-beta", - }, - channel="latest/edge", - num_units=0, - ), - ) - - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=2000) - await ops_test.model.relate(f"{DATABASE_APP_NAME}:juju-info", f"{LS_CLIENT}:container") - await ops_test.model.relate( - f"{DATABASE_APP_NAME}:juju-info", f"{UBUNTU_PRO_APP_NAME}:juju-info" - ) - await ops_test.model.wait_for_idle( - apps=[LS_CLIENT, UBUNTU_PRO_APP_NAME, DATABASE_APP_NAME], status="active" - ) - - -@pytest.mark.group(1) -async def test_scale_up(ops_test: OpsTest, github_secrets): - await scale_application(ops_test, DATABASE_APP_NAME, 4) - - await ops_test.model.wait_for_idle( - apps=[LS_CLIENT, UBUNTU_PRO_APP_NAME, DATABASE_APP_NAME], status="active", timeout=1500 - ) - - -@pytest.mark.group(1) -async def test_scale_down(ops_test: OpsTest, github_secrets): - await scale_application(ops_test, DATABASE_APP_NAME, 3) - - await ops_test.model.wait_for_idle( - apps=[LS_CLIENT, UBUNTU_PRO_APP_NAME, DATABASE_APP_NAME], status="active", timeout=1500 - ) diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py deleted file mode 100644 index d336f96259..0000000000 --- a/tests/integration/test_tls.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. -import logging -import os - -import pytest as pytest -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_attempt, stop_after_delay, wait_exponential - -from . import architecture -from .helpers import ( - CHARM_SERIES, - DATABASE_APP_NAME, - METADATA, - change_primary_start_timeout, - check_tls, - check_tls_patroni_api, - check_tls_replication, - db_connect, - get_password, - get_primary, - get_unit_address, - primary_changed, - restart_machine, - run_command_on_unit, -) -from .juju_ import juju_major_version - -logger = logging.getLogger(__name__) - -APP_NAME = METADATA["name"] -if juju_major_version < 3: - tls_certificates_app_name = "tls-certificates-operator" - if architecture.architecture == "arm64": - tls_channel = "legacy/edge" - else: - tls_channel = "legacy/stable" - tls_config = {"generate-self-signed-certificates": "true", "ca-common-name": "Test CA"} -else: - tls_certificates_app_name = "self-signed-certificates" - if architecture.architecture == "arm64": - tls_channel = "latest/edge" - else: - tls_channel = "latest/stable" - tls_config = {"ca-common-name": "Test CA"} - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy_active(ops_test: OpsTest): - """Build the charm and deploy it.""" - charm = await ops_test.build_charm(".") - async with ops_test.fast_forward(): - await ops_test.model.deploy( - charm, - application_name=APP_NAME, - num_units=3, - series=CHARM_SERIES, - config={"profile": "testing"}, - ) - # No wait between deploying charms, since we can't guarantee users will wait. Furthermore, - # bundles don't wait between deploying charms. - - -@pytest.mark.group(1) -async def test_tls_enabled(ops_test: OpsTest) -> None: - """Test that TLS is enabled when relating to the TLS Certificates Operator.""" - async with ops_test.fast_forward(): - # Deploy TLS Certificates operator. - await ops_test.model.deploy( - tls_certificates_app_name, config=tls_config, channel=tls_channel - ) - - # Relate it to the PostgreSQL to enable TLS. - await ops_test.model.relate(DATABASE_APP_NAME, tls_certificates_app_name) - await ops_test.model.wait_for_idle(status="active", timeout=1500) - - # Wait for all units enabling TLS. - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - assert await check_tls(ops_test, unit.name, enabled=True) - assert await check_tls_patroni_api(ops_test, unit.name, enabled=True) - - # Test TLS being used by pg_rewind. To accomplish that, get the primary unit - # and a replica that will be promoted to primary (this should trigger a rewind - # operation when the old primary is started again). - any_unit = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - primary = await get_primary(ops_test, any_unit) - replica = [ - unit.name - for unit in ops_test.model.applications[DATABASE_APP_NAME].units - if unit.name != primary - ][0] - - # Check if TLS enabled for replication - assert await check_tls_replication(ops_test, primary, enabled=True) - - # Enable additional logs on the PostgreSQL instance to check TLS - # being used in a later step and make the fail-over to happens faster. - await ops_test.model.applications[DATABASE_APP_NAME].set_config({ - "logging_log_connections": "True" - }) - await ops_test.model.wait_for_idle( - apps=[DATABASE_APP_NAME], status="active", idle_period=30 - ) - change_primary_start_timeout(ops_test, primary, 0) - - for attempt in Retrying( - stop=stop_after_delay(60 * 5), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - # Promote the replica to primary. - await run_command_on_unit( - ops_test, - replica, - "sudo -u snap_daemon charmed-postgresql.pg-ctl -D /var/snap/charmed-postgresql/common/var/lib/postgresql/ promote", - ) - - # Check that the replica was promoted. - host = get_unit_address(ops_test, replica) - password = await get_password(ops_test, replica) - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - cursor.execute("SELECT pg_is_in_recovery();") - in_recovery = cursor.fetchone()[0] - print(f"in_recovery: {in_recovery}") - assert not in_recovery - connection.close() - - # Write some data to the initial primary (this causes a divergence - # in the instances' timelines). - host = get_unit_address(ops_test, primary) - password = await get_password(ops_test, primary) - with db_connect(host, password) as connection: - connection.autocommit = True - with connection.cursor() as cursor: - cursor.execute("CREATE TABLE IF NOT EXISTS pgrewindtest (testcol INT);") - cursor.execute("INSERT INTO pgrewindtest SELECT generate_series(1,1000);") - connection.close() - - # Stop the initial primary by killing both Patroni and PostgreSQL OS processes. - await run_command_on_unit( - ops_test, - primary, - "pkill --signal SIGKILL -f /snap/charmed-postgresql/current/usr/lib/postgresql/14/bin/postgres", - ) - await run_command_on_unit( - ops_test, - primary, - "pkill --signal SIGKILL -f /snap/charmed-postgresql/[0-9]*/usr/bin/patroni", - ) - - # Check that the primary changed. - assert await primary_changed(ops_test, primary), "primary not changed" - change_primary_start_timeout(ops_test, primary, 300) - - # Check the logs to ensure TLS is being used by pg_rewind. - primary = await get_primary(ops_test, primary) - await run_command_on_unit( - ops_test, - primary, - "grep 'connection authorized: user=rewind database=postgres SSL enabled' /var/snap/charmed-postgresql/common/var/log/postgresql/postgresql-*.log", - ) - - # Remove the relation. - await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( - f"{DATABASE_APP_NAME}:certificates", f"{tls_certificates_app_name}:certificates" - ) - await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) - - # Wait for all units disabling TLS. - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - assert await check_tls(ops_test, unit.name, enabled=False) - assert await check_tls_patroni_api(ops_test, unit.name, enabled=False) - - -@pytest.mark.group(1) -@pytest.mark.skipif( - not os.environ.get("RESTART_MACHINE_TEST"), - reason="RESTART_MACHINE_TEST environment variable not set", -) -async def test_restart_machine(ops_test: OpsTest) -> None: - async with ops_test.fast_forward(): - # Relate it to the PostgreSQL to enable TLS. - await ops_test.model.relate(DATABASE_APP_NAME, tls_certificates_app_name) - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Wait for all units enabling TLS. - for unit in ops_test.model.applications[DATABASE_APP_NAME].units: - assert await check_tls(ops_test, unit.name, enabled=True) - assert await check_tls_patroni_api(ops_test, unit.name, enabled=True) - - unit_name = "postgresql/0" - issue_found = False - for attempt in Retrying(stop=stop_after_attempt(10)): - with attempt: - # Restart the machine of the unit. - logger.info(f"restarting {unit_name}") - await restart_machine(ops_test, unit_name) - - # Check whether the issue happened (the storage wasn't mounted). - logger.info( - f"checking whether storage was mounted - attempt {attempt.retry_state.attempt_number}" - ) - result = await run_command_on_unit(ops_test, unit_name, "lsblk") - if "/var/lib/postgresql/data" not in result: - issue_found = True - - assert ( - issue_found - ), "Couldn't reproduce the issue from https://bugs.launchpad.net/juju/+bug/1999758" - - # Wait for the unit to be ready again. Some errors in the start hook may happen due - # to rebooting in the middle of a hook. - await ops_test.model.wait_for_idle(status="active", timeout=1000, raise_on_error=False) - - # Wait for the unit enabling TLS again. - logger.info(f"checking TLS on {unit_name}") - assert await check_tls(ops_test, "postgresql/0", enabled=True) - logger.info(f"checking TLS on Patroni API from {unit_name}") - assert await check_tls_patroni_api(ops_test, "postgresql/0", enabled=True)