From d6e56f0bb11b3e9ec1cc595800f9b114d5ecc756 Mon Sep 17 00:00:00 2001 From: Lucas Gameiro Borges Date: Wed, 26 Jun 2024 00:26:01 +0000 Subject: [PATCH] slice test in 2 groups --- tests/integration/test_backups.py | 523 +++++++++++++++--------------- 1 file changed, 267 insertions(+), 256 deletions(-) diff --git a/tests/integration/test_backups.py b/tests/integration/test_backups.py index 8d74062058..0425623351 100644 --- a/tests/integration/test_backups.py +++ b/tests/integration/test_backups.py @@ -97,286 +97,297 @@ async def cloud_configs(ops_test: OpsTest, github_secrets) -> None: bucket_object.delete() -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_backup(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm) -> None: +async def test_backup(ops_test: OpsTest, cloud, config, charm) -> None: """Build and deploy two units of PostgreSQL and then test the backup and restore actions.""" # Deploy S3 Integrator and TLS Certificates Operator. await ops_test.model.deploy(S3_INTEGRATOR_APP_NAME) await ops_test.model.deploy(tls_certificates_app_name, config=tls_config, channel=tls_channel) - for cloud, config in cloud_configs[0].items(): - # Deploy and relate PostgreSQL to S3 integrator (one database app for each cloud for now - # as archive_mode is disabled after restoring the backup) and to TLS Certificates Operator - # (to be able to create backups from replicas). - database_app_name = f"{DATABASE_APP_NAME}-{cloud.lower()}" - await ops_test.model.deploy( - charm, - application_name=database_app_name, - num_units=2, - series=CHARM_SERIES, - config={"profile": "testing"}, + # Deploy and relate PostgreSQL to S3 integrator (one database app for each cloud for now + # as archive_mode is disabled after restoring the backup) and to TLS Certificates Operator + # (to be able to create backups from replicas). + database_app_name = f"{DATABASE_APP_NAME}-{cloud.lower()}" + await ops_test.model.deploy( + charm, + application_name=database_app_name, + num_units=2, + series=CHARM_SERIES, + config={"profile": "testing"}, + ) + + await ops_test.model.relate(database_app_name, tls_certificates_app_name) + async with ops_test.fast_forward(fast_interval="60s"): + await ops_test.model.wait_for_idle(apps=[database_app_name], status="active", timeout=1000) + await ops_test.model.relate(database_app_name, S3_INTEGRATOR_APP_NAME) + + # Configure and set access and secret keys. + logger.info(f"configuring S3 integrator for {cloud}") + await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(config) + action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( + "sync-s3-credentials", + **cloud_configs[1][cloud], + ) + await action.wait() + async with ops_test.fast_forward(fast_interval="60s"): + await ops_test.model.wait_for_idle( + apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1500 ) - await ops_test.model.relate(database_app_name, tls_certificates_app_name) - async with ops_test.fast_forward(fast_interval="60s"): - await ops_test.model.wait_for_idle( - apps=[database_app_name], status="active", timeout=1000 - ) - await ops_test.model.relate(database_app_name, S3_INTEGRATOR_APP_NAME) - - # Configure and set access and secret keys. - logger.info(f"configuring S3 integrator for {cloud}") - await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config(config) - action = await ops_test.model.units.get(f"{S3_INTEGRATOR_APP_NAME}/0").run_action( - "sync-s3-credentials", - **cloud_configs[1][cloud], + primary = await get_primary(ops_test, f"{database_app_name}/0") + for unit in ops_test.model.applications[database_app_name].units: + if unit.name != primary: + replica = unit.name + break + + # Write some data. + password = await get_password(ops_test, primary) + address = get_unit_address(ops_test, primary) + logger.info("creating a table in the database") + with db_connect(host=address, password=password) as connection: + connection.autocommit = True + connection.cursor().execute( + "CREATE TABLE IF NOT EXISTS backup_table_1 (test_collumn INT );" ) - await action.wait() - async with ops_test.fast_forward(fast_interval="60s"): - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1500 - ) + connection.close() - primary = await get_primary(ops_test, f"{database_app_name}/0") - for unit in ops_test.model.applications[database_app_name].units: - if unit.name != primary: - replica = unit.name - break - - # Write some data. - password = await get_password(ops_test, primary) - address = get_unit_address(ops_test, primary) - logger.info("creating a table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute( - "CREATE TABLE IF NOT EXISTS backup_table_1 (test_collumn INT );" - ) - connection.close() - - # Run the "create backup" action. - logger.info("creating a backup") - action = await ops_test.model.units.get(replica).run_action("create-backup") - await action.wait() - backup_status = action.results.get("backup-status") - assert backup_status, "backup hasn't succeeded" - await ops_test.model.wait_for_idle( - apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1000 + # Run the "create backup" action. + logger.info("creating a backup") + action = await ops_test.model.units.get(replica).run_action("create-backup") + await action.wait() + backup_status = action.results.get("backup-status") + assert backup_status, "backup hasn't succeeded" + await ops_test.model.wait_for_idle( + apps=[database_app_name, S3_INTEGRATOR_APP_NAME], status="active", timeout=1000 + ) + + # With a stable cluster, Run the "create backup" action + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle(status="active", timeout=1000, idle_period=30) + logger.info("listing the available backups") + action = await ops_test.model.units.get(replica).run_action("list-backups") + await action.wait() + backups = action.results.get("backups") + # 2 lines for header output, 1 backup line ==> 3 total lines + assert len(backups.split("\n")) == 3, "full backup is not outputted" + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Write some data. + logger.info("creating a second table in the database") + with db_connect(host=address, password=password) as connection: + connection.autocommit = True + connection.cursor().execute("CREATE TABLE backup_table_2 (test_collumn INT );") + connection.close() + + # Run the "create backup" action. + logger.info("creating a backup") + action = await ops_test.model.units.get(replica).run_action( + "create-backup", **{"type": "differential"} + ) + await action.wait() + backup_status = action.results.get("backup-status") + assert backup_status, "backup hasn't succeeded" + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Run the "list backups" action. + logger.info("listing the available backups") + action = await ops_test.model.units.get(replica).run_action("list-backups") + await action.wait() + backups = action.results.get("backups") + # 2 lines for header output, 2 backup lines ==> 4 total lines + assert len(backups.split("\n")) == 4, "differential backup is not outputted" + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Write some data. + logger.info("creating a second table in the database") + with db_connect(host=address, password=password) as connection: + connection.autocommit = True + connection.cursor().execute("CREATE TABLE backup_table_3 (test_collumn INT );") + connection.close() + # Scale down to be able to restore. + async with ops_test.fast_forward(): + await ops_test.model.destroy_unit(replica) + await ops_test.model.block_until( + lambda: len(ops_test.model.applications[database_app_name].units) == 1 ) - # With a stable cluster, Run the "create backup" action - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000, idle_period=30) - logger.info("listing the available backups") - action = await ops_test.model.units.get(replica).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - # 2 lines for header output, 1 backup line ==> 3 total lines - assert len(backups.split("\n")) == 3, "full backup is not outputted" + for unit in ops_test.model.applications[database_app_name].units: + remaining_unit = unit + break + + # Run the "restore backup" action for differential backup. + for attempt in Retrying( + stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) + ): + with attempt: + logger.info("restoring the backup") + last_diff_backup = backups.split("\n")[-1] + backup_id = last_diff_backup.split()[0] + action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) + await action.wait() + restore_status = action.results.get("restore-status") + assert restore_status, "restore hasn't succeeded" + + # Wait for the restore to complete. + async with ops_test.fast_forward(): await ops_test.model.wait_for_idle(status="active", timeout=1000) - # Write some data. - logger.info("creating a second table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute("CREATE TABLE backup_table_2 (test_collumn INT );") - connection.close() - - # Run the "create backup" action. - logger.info("creating a backup") - action = await ops_test.model.units.get(replica).run_action( - "create-backup", **{"type": "differential"} + # Check that the backup was correctly restored by having only the first created table. + logger.info("checking that the backup was correctly restored") + primary = await get_primary(ops_test, remaining_unit.name) + address = get_unit_address(ops_test, primary) + with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" + ) + assert cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" ) - await action.wait() - backup_status = action.results.get("backup-status") - assert backup_status, "backup hasn't succeeded" - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Run the "list backups" action. - logger.info("listing the available backups") - action = await ops_test.model.units.get(replica).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - # 2 lines for header output, 2 backup lines ==> 4 total lines - assert len(backups.split("\n")) == 4, "differential backup is not outputted" + assert cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_2' doesn't exist" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" + ) + assert not cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_3' exists" + connection.close() + + # Run the "restore backup" action for full backup. + for attempt in Retrying( + stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) + ): + with attempt: + logger.info("restoring the backup") + last_full_backup = backups.split("\n")[-2] + backup_id = last_full_backup.split()[0] + action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) + await action.wait() + restore_status = action.results.get("restore-status") + assert restore_status, "restore hasn't succeeded" + + # Wait for the restore to complete. + async with ops_test.fast_forward(): await ops_test.model.wait_for_idle(status="active", timeout=1000) - # Write some data. - logger.info("creating a second table in the database") - with db_connect(host=address, password=password) as connection: - connection.autocommit = True - connection.cursor().execute("CREATE TABLE backup_table_3 (test_collumn INT );") - connection.close() - # Scale down to be able to restore. - async with ops_test.fast_forward(): - await ops_test.model.destroy_unit(replica) - await ops_test.model.block_until( - lambda: len(ops_test.model.applications[database_app_name].units) == 1 - ) + # Check that the backup was correctly restored by having only the first created table. + primary = await get_primary(ops_test, remaining_unit.name) + address = get_unit_address(ops_test, primary) + logger.info("checking that the backup was correctly restored") + with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" + ) + assert cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" + ) + assert not cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_2' exists" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" + ) + assert not cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_3' exists" + connection.close() - for unit in ops_test.model.applications[database_app_name].units: - remaining_unit = unit - break - # Run the "restore backup" action for differential backup. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - last_diff_backup = backups.split("\n")[-1] - backup_id = last_diff_backup.split()[0] - action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Check that the backup was correctly restored by having only the first created table. - logger.info("checking that the backup was correctly restored") - primary = await get_primary(ops_test, remaining_unit.name) - address = get_unit_address(ops_test, primary) - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_2' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_3' exists" - connection.close() - - # Run the "restore backup" action for full backup. - for attempt in Retrying( - stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - logger.info("restoring the backup") - last_full_backup = backups.split("\n")[-2] - backup_id = last_full_backup.split()[0] - action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) - await action.wait() - restore_status = action.results.get("restore-status") - assert restore_status, "restore hasn't succeeded" - - # Wait for the restore to complete. - async with ops_test.fast_forward(): - await ops_test.model.wait_for_idle(status="active", timeout=1000) - - # Check that the backup was correctly restored by having only the first created table. - primary = await get_primary(ops_test, remaining_unit.name) - address = get_unit_address(ops_test, primary) - logger.info("checking that the backup was correctly restored") - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_2' exists" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" - ) - assert not cursor.fetchone()[ - 0 - ], "backup wasn't correctly restored: table 'backup_table_3' exists" - connection.close() - - # Run the following steps only in one cloud (it's enough for those checks). - if cloud == list(cloud_configs[0].keys())[0]: - # Remove the relation to the TLS certificates operator. - await ops_test.model.applications[database_app_name].remove_relation( - f"{database_app_name}:certificates", f"{tls_certificates_app_name}:certificates" - ) - await ops_test.model.wait_for_idle( - apps=[database_app_name], status="active", timeout=1000 - ) +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_backup_aws(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm) -> None: + config = cloud_configs[0].get(AWS) + await test_backup(ops_test, AWS, config, charm) - # Scale up to be able to test primary and leader being different. - async with ops_test.fast_forward(): - await scale_application(ops_test, database_app_name, 2) - - # Ensure replication is working correctly. - new_unit_name = f"{database_app_name}/2" - address = get_unit_address(ops_test, new_unit_name) - with db_connect( - host=address, password=password - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" - ) - assert cursor.fetchone()[ - 0 - ], f"replication isn't working correctly: table 'backup_table_1' doesn't exist in {new_unit_name}" - cursor.execute( - "SELECT EXISTS (SELECT FROM information_schema.tables" - " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" - ) - assert not cursor.fetchone()[ - 0 - ], f"replication isn't working correctly: table 'backup_table_2' exists in {new_unit_name}" - connection.close() - - switchover(ops_test, primary, new_unit_name) - - # Get the new primary unit. - primary = await get_primary(ops_test, new_unit_name) - # Check that the primary changed. - for attempt in Retrying( - stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) - ): - with attempt: - assert primary == new_unit_name - - # Ensure stanza is working correctly. - logger.info("listing the available backups") - action = await ops_test.model.units.get(new_unit_name).run_action("list-backups") - await action.wait() - backups = action.results.get("backups") - assert backups, "backups not outputted" - await ops_test.model.wait_for_idle(status="active", timeout=1000) + database_app_name = f"{DATABASE_APP_NAME}-aws" + primary = await get_primary(ops_test, f"{database_app_name}/0") + password = await get_password(ops_test, primary) + + # Remove the relation to the TLS certificates operator. + await ops_test.model.applications[database_app_name].remove_relation( + f"{database_app_name}:certificates", f"{tls_certificates_app_name}:certificates" + ) + await ops_test.model.wait_for_idle(apps=[database_app_name], status="active", timeout=1000) + + # Scale up to be able to test primary and leader being different. + async with ops_test.fast_forward(): + await scale_application(ops_test, database_app_name, 2) + + # Ensure replication is working correctly. + new_unit_name = f"{database_app_name}/2" + address = get_unit_address(ops_test, new_unit_name) + with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" + ) + assert cursor.fetchone()[ + 0 + ], f"replication isn't working correctly: table 'backup_table_1' doesn't exist in {new_unit_name}" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" + ) + assert not cursor.fetchone()[ + 0 + ], f"replication isn't working correctly: table 'backup_table_2' exists in {new_unit_name}" + connection.close() - # Remove the database app. - await ops_test.model.remove_application(database_app_name, block_until_done=True) + switchover(ops_test, primary, new_unit_name) + + # Get the new primary unit. + primary = await get_primary(ops_test, new_unit_name) + # Check that the primary changed. + for attempt in Retrying( + stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30) + ): + with attempt: + assert primary == new_unit_name + + # Ensure stanza is working correctly. + logger.info("listing the available backups") + action = await ops_test.model.units.get(new_unit_name).run_action("list-backups") + await action.wait() + backups = action.results.get("backups") + assert backups, "backups not outputted" + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Remove the database app. + await ops_test.model.remove_application(database_app_name, block_until_done=True) # Remove the TLS operator. await ops_test.model.remove_application(tls_certificates_app_name, block_until_done=True) -@pytest.mark.group(1) +@pytest.mark.group(2) +@pytest.mark.abort_on_fail +async def test_backup_gcp(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm) -> None: + config = cloud_configs[0].get(GCP) + + await test_backup(ops_test, GCP, config, charm) + database_app_name = f"{DATABASE_APP_NAME}-gcp" + + # Remove the database app. + await ops_test.model.remove_application(database_app_name, block_until_done=True) + + # Remove the TLS operator. + await ops_test.model.remove_application(tls_certificates_app_name, block_until_done=True) + + +@pytest.mark.group(2) async def test_restore_on_new_cluster(ops_test: OpsTest, github_secrets, charm) -> None: """Test that is possible to restore a backup to another PostgreSQL cluster.""" previous_database_app_name = f"{DATABASE_APP_NAME}-gcp" @@ -467,7 +478,7 @@ async def test_restore_on_new_cluster(ops_test: OpsTest, github_secrets, charm) connection.close() -@pytest.mark.group(1) +@pytest.mark.group(2) async def test_invalid_config_and_recovery_after_fixing_it( ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict] ) -> None: