diff --git a/poetry.lock b/poetry.lock index a83bddad16..7f72b6ae12 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1560,6 +1560,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1567,8 +1568,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1585,6 +1594,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1592,6 +1602,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, diff --git a/tests/integration/ha_tests/helpers.py b/tests/integration/ha_tests/helpers.py index 98e0589b18..ece6f0b735 100644 --- a/tests/integration/ha_tests/helpers.py +++ b/tests/integration/ha_tests/helpers.py @@ -1010,6 +1010,36 @@ async def check_db(ops_test: OpsTest, app: str, db: str) -> bool: return db in query +async def lxc_restart_service(machine_name: str, force: bool = False): + restart_command = f"lxc restart {machine_name}" + if force: + restart_command = restart_command + " --force" + else: + restart_command = restart_command + " --timeout=1500" + subprocess.check_call(restart_command.split()) + + +async def check_graceful_shutdown(ops_test: OpsTest, unit_name: str) -> bool: + log_str = "shutting down" + stdout = await run_command_on_unit( + ops_test, + unit_name, + "cat /var/snap/charmed-postgresql/common/var/log/postgresql/postgresql*", + ) + + return log_str in str(stdout) + + +async def check_success_recovery(ops_test: OpsTest, unit_name: str) -> bool: + log_str = "consistent recovery state reached" + stdout = await run_command_on_unit( + ops_test, + unit_name, + f"""grep -E '{log_str}' /var/snap/charmed-postgresql/common/var/log/postgresql/postgresql*""", + ) + return log_str in str(stdout) + + async def get_any_deatached_storage(ops_test: OpsTest) -> str: """Returns any of the current available deatached storage.""" return_code, storages_list, stderr = await ops_test.juju( diff --git a/tests/integration/ha_tests/test_functional.py b/tests/integration/ha_tests/test_functional.py new file mode 100644 index 0000000000..19ea2b2441 --- /dev/null +++ b/tests/integration/ha_tests/test_functional.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 +# Copyright 2023 Canonical Ltd. +# See LICENSE file for licensing details. + +import logging +import uuid +from typing import Dict, Tuple + +import boto3 +import pytest +from pytest_operator.plugin import OpsTest +from tenacity import Retrying, stop_after_attempt, stop_after_delay, wait_exponential, wait_fixed + +from ..helpers import ( + APPLICATION_NAME, + CHARM_SERIES, + DATABASE_APP_NAME, + construct_endpoint, + get_machine_from_unit, +) +from .helpers import ( + check_db, + check_graceful_shutdown, + check_success_recovery, + create_db, + is_postgresql_ready, + lxc_restart_service, +) + +TEST_DATABASE_NAME = "test_database" +DUP_APPLICATION_NAME = "postgres-test-dup" +S3_INTEGRATOR_APP_NAME = "s3-integrator" + +logger = logging.getLogger(__name__) + +AWS = "AWS" + + +@pytest.fixture(scope="module") +async def cloud_configs(ops_test: OpsTest, github_secrets) -> None: + # Define some configurations and credentials. + configs = { + AWS: { + "endpoint": "https://s3.amazonaws.com", + "bucket": "data-charms-testing", + "path": f"/postgresql-vm/{uuid.uuid1()}", + "region": "us-east-1", + }, + } + credentials = { + AWS: { + "access-key": github_secrets["AWS_ACCESS_KEY"], + "secret-key": github_secrets["AWS_SECRET_KEY"], + }, + } + yield configs, credentials + # Delete the previously created objects. + logger.info("deleting the previously created backups") + for cloud, config in configs.items(): + session = boto3.session.Session( + aws_access_key_id=credentials[cloud]["access-key"], + aws_secret_access_key=credentials[cloud]["secret-key"], + region_name=config["region"], + ) + s3 = session.resource( + "s3", endpoint_url=construct_endpoint(config["endpoint"], config["region"]) + ) + bucket = s3.Bucket(config["bucket"]) + for bucket_object in bucket.objects.filter(Prefix=config["path"].lstrip("/")): + bucket_object.delete() + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_instance_graceful_restart(ops_test: OpsTest, charm: str) -> None: + """Test graceful restart of a service.""" + async with ops_test.fast_forward(): + # Deploy the charm. + logger.info("deploying charm") + await ops_test.model.deploy( + charm, + application_name=APPLICATION_NAME, + num_units=1, + series=CHARM_SERIES, + config={"profile": "testing"}, + ) + + logger.info("waiting for idle") + await ops_test.model.wait_for_idle(apps=[APPLICATION_NAME], status="active", timeout=1500) + assert ops_test.model.applications[APPLICATION_NAME].units[0].workload_status == "active" + + primary_name = ops_test.model.applications[APPLICATION_NAME].units[0].name + + logger.info("waiting for postgresql") + for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): + with attempt: + assert await is_postgresql_ready(ops_test, primary_name) + + # Get unit hostname and IP. + logger.info("wait for hostname") + primary_hostname = await get_machine_from_unit(ops_test, primary_name) + + logger.info("restarting service") + await lxc_restart_service(primary_hostname) + + logger.info("waiting for idle") + await ops_test.model.wait_for_idle( + apps=[APPLICATION_NAME], status="active", timeout=1500, raise_on_error=False + ) + assert ops_test.model.applications[APPLICATION_NAME].units[0].workload_status == "active" + + logger.info("check graceful shutdown") + assert await check_graceful_shutdown(ops_test, primary_name) + + logger.info("check success recovery") + assert await check_success_recovery(ops_test, primary_name) + + logger.info("remove application") + for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): + with attempt: + await ops_test.model.remove_application(APPLICATION_NAME, block_until_done=True) + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_instance_forceful_restart(ops_test: OpsTest, charm: str) -> None: + """Test forceful restart of a service.""" + async with ops_test.fast_forward(): + # Deploy the charm. + logger.info("deploying charm") + await ops_test.model.deploy( + charm, + application_name=APPLICATION_NAME, + num_units=1, + series=CHARM_SERIES, + config={"profile": "testing"}, + ) + + logger.info("waiting for idle") + await ops_test.model.wait_for_idle(apps=[APPLICATION_NAME], status="active", timeout=1500) + assert ops_test.model.applications[APPLICATION_NAME].units[0].workload_status == "active" + + primary_name = ops_test.model.applications[APPLICATION_NAME].units[0].name + + logger.info("waiting for postgresql") + for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): + with attempt: + assert await is_postgresql_ready(ops_test, primary_name) + + # Get unit hostname and IP. + logger.info("wait for hostname") + primary_hostname = await get_machine_from_unit(ops_test, primary_name) + + logger.info("restarting service with force") + await lxc_restart_service(primary_hostname, force=True) + + logger.info("waiting for idle") + await ops_test.model.wait_for_idle( + apps=[APPLICATION_NAME], status="active", timeout=1500, raise_on_error=False + ) + assert ops_test.model.applications[APPLICATION_NAME].units[0].workload_status == "active" + + logger.info("check forceful shutdown") + assert not await check_graceful_shutdown(ops_test, primary_name) + + logger.info("check success recovery") + assert await check_success_recovery(ops_test, primary_name) + + logger.info("remove application") + for attempt in Retrying(stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True): + with attempt: + await ops_test.model.remove_application(APPLICATION_NAME, block_until_done=True) + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_instance_backup_with_restart( + ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm +) -> None: + """Test instance backup after recovery.""" + async with ops_test.fast_forward(): + logger.info("deploying s3") + await ops_test.model.deploy(S3_INTEGRATOR_APP_NAME) + + for cloud, config in cloud_configs[0].items(): + # Deploy and relate PostgreSQL to S3 integrator (one database app for each cloud for now + # as archive_mode is disabled after restoring the backup) + logger.info("deploying charm") + await ops_test.model.deploy( + charm, + application_name=DATABASE_APP_NAME, + num_units=1, + series=CHARM_SERIES, + config={"profile": "testing"}, + ) + + logger.info("relate s3") + await ops_test.model.relate(DATABASE_APP_NAME, S3_INTEGRATOR_APP_NAME) + + # Configure and set access and secret keys. + logger.info(f"configuring S3 integrator for {cloud}") + await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(config) + action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( + "sync-s3-credentials", + **cloud_configs[1][cloud], + ) + await action.wait() + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME, S3_INTEGRATOR_APP_NAME], status="active", timeout=1500 + ) + + primary_unit = ops_test.model.applications[DATABASE_APP_NAME].units[0] + primary_name = primary_unit.name + + # Write some data. + logger.info("write data before backup") + await create_db(ops_test, DATABASE_APP_NAME, TEST_DATABASE_NAME) + + # Run the "create backup" action. + logger.info("creating a backup") + action = await ops_test.model.units.get(primary_name).run_action("create-backup") + await action.wait() + backup_status = action.results.get("backup-status") + assert backup_status, "backup hasn't succeeded" + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME, S3_INTEGRATOR_APP_NAME], status="active", timeout=1000 + ) + + # Run the "list backups" action. + logger.info("listing the available backups") + action = await ops_test.model.units.get(primary_name).run_action("list-backups") + await action.wait() + backups = action.results.get("backups") + assert backups, "backups not outputted" + await ops_test.model.wait_for_idle(status="active", timeout=1500) + + # Write some data. + logger.info("write data after backup") + await create_db(ops_test, DATABASE_APP_NAME, TEST_DATABASE_NAME + "_dup") + + logger.info("wait for hostname") + primary_hostname = await get_machine_from_unit(ops_test, primary_name) + + logger.info("restarting service with force") + await lxc_restart_service(primary_hostname, force=True) + + logger.info("waiting for idle") + await ops_test.model.wait_for_idle( + apps=[DATABASE_APP_NAME], status="active", timeout=1500, raise_on_error=False + ) + assert ( + ops_test.model.applications[DATABASE_APP_NAME].units[0].workload_status == "active" + ) + + logger.info("check forceful shutdown") + assert not await check_graceful_shutdown(ops_test, primary_name) + + logger.info("check success recovery") + assert await check_success_recovery(ops_test, primary_name) + + # Run the "restore backup" action. + for attempt in Retrying( + stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) + ): + with attempt: + logger.info("restoring the backup") + most_recent_backup = backups.split("\n")[-1] + backup_id = most_recent_backup.split()[0] + action = await primary_unit.run_action("restore", **{"backup-id": backup_id}) + await action.wait() + restore_status = action.results.get("restore-status") + assert restore_status, "restore hasn't succeeded" + + # Wait for the restore to complete. + logger.info("wait for restore") + await ops_test.model.wait_for_idle(status="active", timeout=1500) + + logger.info("checking data consistency") + assert await check_db(ops_test, DATABASE_APP_NAME, TEST_DATABASE_NAME) + assert not await check_db(ops_test, DATABASE_APP_NAME, TEST_DATABASE_NAME + "_dup") + + logger.info("remove application") + for attempt in Retrying( + stop=stop_after_delay(15 * 3), wait=wait_fixed(3), reraise=True + ): + with attempt: + await ops_test.model.remove_application( + DATABASE_APP_NAME, block_until_done=True + )