diff --git a/actions.yaml b/actions.yaml index 7364321fa7..4051f1edd4 100644 --- a/actions.yaml +++ b/actions.yaml @@ -3,6 +3,14 @@ create-backup: description: Creates a backup to s3 storage. + params: + type: + type: string + description: The backup type, the default value is 'full'. + Full backup is a full copy of all data. + Differential backup is a copy only of changed data since the last full backup. + Incremental backup is a copy only of changed data since the last backup (any type). + Possible values - full, differential, incremental. get-primary: description: Get the unit which is the primary/leader in the replication. get-password: diff --git a/config.yaml b/config.yaml index 35617edbab..89af90fc71 100644 --- a/config.yaml +++ b/config.yaml @@ -299,6 +299,10 @@ options: default: false type: boolean description: Enable pgvector extension + plugin_timescaledb_enable: + default: false + type: boolean + description: Enable timescaledb extension profile: description: | Profile representing the scope of deployment, and used to tune resource allocation. diff --git a/src/backups.py b/src/backups.py index a34f67ab1b..b2e0297669 100644 --- a/src/backups.py +++ b/src/backups.py @@ -29,6 +29,7 @@ from constants import ( BACKUP_ID_FORMAT, + BACKUP_TYPE_OVERRIDES, BACKUP_USER, PATRONI_CONF_PATH, PGBACKREST_BACKUP_ID_FORMAT, @@ -343,22 +344,20 @@ def _generate_backup_list_output(self) -> str: backups = json.loads(output)[0]["backup"] for backup in backups: - backup_id = datetime.strftime( - datetime.strptime(backup["label"][:-1], PGBACKREST_BACKUP_ID_FORMAT), - BACKUP_ID_FORMAT, - ) + backup_id, backup_type = self._parse_backup_id(backup["label"]) error = backup["error"] backup_status = "finished" if error: backup_status = f"failed: {error}" - backup_list.append((backup_id, "physical", backup_status)) + backup_list.append((backup_id, backup_type, backup_status)) return self._format_backup_list(backup_list) - def _list_backups(self, show_failed: bool) -> OrderedDict[str, str]: + def _list_backups(self, show_failed: bool, parse=True) -> OrderedDict[str, str]: """Retrieve the list of backups. Args: show_failed: whether to also return the failed backups. + parse: whether to convert backup labels to their IDs or not. Returns: a dict of previously created backups (id + stanza name) or an empty list @@ -383,16 +382,35 @@ def _list_backups(self, show_failed: bool) -> OrderedDict[str, str]: stanza_name = repository_info["name"] return OrderedDict[str, str]( ( - datetime.strftime( - datetime.strptime(backup["label"][:-1], PGBACKREST_BACKUP_ID_FORMAT), - BACKUP_ID_FORMAT, - ), + self._parse_backup_id(backup["label"])[0] if parse else backup["label"], stanza_name, ) for backup in backups if show_failed or not backup["error"] ) + def _parse_backup_id(self, label) -> Tuple[str, str]: + """Parse backup ID as a timestamp.""" + if label[-1] == "F": + timestamp = label + backup_type = "full" + elif label[-1] == "D": + timestamp = label.split("_")[1] + backup_type = "differential" + elif label[-1] == "I": + timestamp = label.split("_")[1] + backup_type = "incremental" + else: + raise ValueError("Unknown label format for backup ID: %s", label) + + return ( + datetime.strftime( + datetime.strptime(timestamp[:-1], PGBACKREST_BACKUP_ID_FORMAT), + BACKUP_ID_FORMAT, + ), + backup_type, + ) + def _initialise_stanza(self) -> None: """Initialize the stanza. @@ -569,8 +587,16 @@ def _on_s3_credential_gone(self, _) -> None: if self.charm.is_blocked and self.charm.unit.status.message in S3_BLOCK_MESSAGES: self.charm.unit.status = ActiveStatus() - def _on_create_backup_action(self, event) -> None: + def _on_create_backup_action(self, event) -> None: # noqa: C901 """Request that pgBackRest creates a backup.""" + backup_type = event.params.get("type", "full") + if backup_type not in BACKUP_TYPE_OVERRIDES: + error_message = f"Invalid backup type: {backup_type}. Possible values: {', '.join(BACKUP_TYPE_OVERRIDES.keys())}." + logger.error(f"Backup failed: {error_message}") + event.fail(error_message) + return + + logger.info(f"A {backup_type} backup has been requested on unit") can_unit_perform_backup, validation_message = self._can_unit_perform_backup() if not can_unit_perform_backup: logger.error(f"Backup failed: {validation_message}") @@ -612,7 +638,7 @@ def _on_create_backup_action(self, event) -> None: # (reference: https://github.com/pgbackrest/pgbackrest/issues/2007) self.charm.update_config(is_creating_backup=True) - self._run_backup(event, s3_parameters, datetime_backup_requested) + self._run_backup(event, s3_parameters, datetime_backup_requested, backup_type) if not self.charm.is_primary: # Remove the rule that marks the cluster as in a creating backup state @@ -623,14 +649,18 @@ def _on_create_backup_action(self, event) -> None: self.charm.unit.status = ActiveStatus() def _run_backup( - self, event: ActionEvent, s3_parameters: Dict, datetime_backup_requested: str + self, + event: ActionEvent, + s3_parameters: Dict, + datetime_backup_requested: str, + backup_type: str, ) -> None: command = [ PGBACKREST_EXECUTABLE, PGBACKREST_CONFIGURATION_FILE, f"--stanza={self.stanza_name}", "--log-level-console=debug", - "--type=full", + f"--type={BACKUP_TYPE_OVERRIDES[backup_type]}", "backup", ] if self.charm.is_primary: @@ -650,7 +680,7 @@ def _run_backup( else: # Generate a backup id from the current date and time if the backup failed before # generating the backup label (our backup id). - backup_id = datetime.strftime(datetime.now(), "%Y%m%d-%H%M%SF") + backup_id = self._generate_fake_backup_id(backup_type) # Upload the logs to S3. logs = f"""Stdout: @@ -762,7 +792,7 @@ def _on_restore_action(self, event): # Mark the cluster as in a restoring backup state and update the Patroni configuration. logger.info("Configuring Patroni to restore the backup") self.charm.app_peer_data.update({ - "restoring-backup": f"{datetime.strftime(datetime.strptime(backup_id, BACKUP_ID_FORMAT), PGBACKREST_BACKUP_ID_FORMAT)}F", + "restoring-backup": self._fetch_backup_from_id(backup_id), "restore-stanza": backups[backup_id], }) self.charm.update_config() @@ -792,6 +822,37 @@ def _on_restore_action(self, event): event.set_results({"restore-status": "restore started"}) + def _generate_fake_backup_id(self, backup_type: str) -> str: + """Creates a backup id for failed backup operations (to store log file).""" + if backup_type == "full": + return datetime.strftime(datetime.now(), "%Y%m%d-%H%M%SF") + if backup_type == "differential": + backups = self._list_backups(show_failed=False, parse=False).keys() + last_full_backup = None + for label in backups[::-1]: + if label.endswith("F"): + last_full_backup = label + break + + if last_full_backup is None: + raise TypeError("Differential backup requested but no previous full backup") + return f'{last_full_backup}_{datetime.strftime(datetime.now(), "%Y%m%d-%H%M%SD")}' + if backup_type == "incremental": + backups = self._list_backups(show_failed=False, parse=False).keys() + if not backups: + raise TypeError("Incremental backup requested but no previous successful backup") + return f'{backups[-1]}_{datetime.strftime(datetime.now(), "%Y%m%d-%H%M%SI")}' + + def _fetch_backup_from_id(self, backup_id: str) -> str: + """Fetches backup's pgbackrest label from backup id.""" + timestamp = f'{datetime.strftime(datetime.strptime(backup_id, "%Y-%m-%dT%H:%M:%SZ"), "%Y%m%d-%H%M%S")}' + backups = self._list_backups(show_failed=False, parse=False).keys() + for label in backups: + if timestamp in label: + return label + + return None + def _pre_restore_checks(self, event: ActionEvent) -> bool: """Run some checks before starting the restore. diff --git a/src/charm.py b/src/charm.py index 99592539af..535bff80e1 100755 --- a/src/charm.py +++ b/src/charm.py @@ -145,7 +145,6 @@ def __init__(self, *args): self.framework.observe(self.on.get_primary_action, self._on_get_primary) self.framework.observe(self.on[PEER].relation_changed, self._on_peer_relation_changed) self.framework.observe(self.on.secret_changed, self._on_peer_relation_changed) - self.framework.observe(self.on.secret_remove, self._on_peer_relation_changed) self.framework.observe(self.on[PEER].relation_departed, self._on_peer_relation_departed) self.framework.observe(self.on.pgdata_storage_detaching, self._on_pgdata_storage_detaching) self.framework.observe(self.on.start, self._on_start) diff --git a/src/config.py b/src/config.py index c8196b46ba..a472921ec2 100644 --- a/src/config.py +++ b/src/config.py @@ -79,6 +79,7 @@ class CharmConfig(BaseConfigModel): plugin_icu_ext_enable: bool plugin_pltcl_enable: bool plugin_postgis_enable: bool + plugin_timescaledb_enable: bool plugin_address_standardizer_enable: bool plugin_address_standardizer_data_us_enable: bool plugin_postgis_tiger_geocoder_enable: bool diff --git a/src/constants.py b/src/constants.py index fd42e0e247..c157accc04 100644 --- a/src/constants.py +++ b/src/constants.py @@ -36,7 +36,7 @@ SNAP_PACKAGES = [ ( POSTGRESQL_SNAP_NAME, - {"revision": {"aarch64": "112", "x86_64": "113"}, "channel": "14/stable"}, + {"revision": {"aarch64": "114", "x86_64": "115"}, "channel": "14/stable"}, ) ] @@ -72,3 +72,4 @@ ENDPOINT_SIMULTANEOUSLY_BLOCKING_MESSAGE = ( "Please choose one endpoint to use. No need to relate all of them simultaneously!" ) +BACKUP_TYPE_OVERRIDES = {"full": "full", "differential": "diff", "incremental": "incr"} diff --git a/templates/patroni.yml.j2 b/templates/patroni.yml.j2 index 1a12292807..64c00381f7 100644 --- a/templates/patroni.yml.j2 +++ b/templates/patroni.yml.j2 @@ -91,6 +91,7 @@ bootstrap: log_truncate_on_rotation: 'on' logging_collector: 'on' wal_level: logical + shared_preload_libraries: 'timescaledb' {%- if pg_parameters %} {%- for key, value in pg_parameters.items() %} {{key}}: {{value}} diff --git a/templates/pgbackrest.conf.j2 b/templates/pgbackrest.conf.j2 index 2f7c823332..46776c2e7c 100644 --- a/templates/pgbackrest.conf.j2 +++ b/templates/pgbackrest.conf.j2 @@ -4,6 +4,7 @@ lock-path=/tmp log-path={{ log_path }} repo1-retention-full-type=time repo1-retention-full={{ retention_full }} +repo1-retention-history=365 repo1-type=s3 repo1-path={{ path }} repo1-s3-region={{ region }} @@ -15,6 +16,8 @@ repo1-s3-ca-file={{ tls_ca_chain }} {%- endif %} repo1-s3-key={{ access_key }} repo1-s3-key-secret={{ secret_key }} +repo1-block=y +repo1-bundle=y start-fast=y {%- if enable_tls %} tls-server-address=* diff --git a/tests/integration/test_backups.py b/tests/integration/test_backups.py index d5a473686f..887db7fd5b 100644 --- a/tests/integration/test_backups.py +++ b/tests/integration/test_backups.py @@ -165,7 +165,8 @@ async def test_backup(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm action = await ops_test.model.units.get(replica).run_action("list-backups") await action.wait() backups = action.results.get("backups") - assert backups, "backups not outputted" + # 2 lines for header output, 1 backup line ==> 3 total lines + assert len(backups.split("\n")) == 3, "full backup is not outputted" await ops_test.model.wait_for_idle(status="active", timeout=1000) # Write some data. @@ -175,6 +176,32 @@ async def test_backup(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm connection.cursor().execute("CREATE TABLE backup_table_2 (test_collumn INT );") connection.close() + # Run the "create backup" action. + logger.info("creating a backup") + action = await ops_test.model.units.get(replica).run_action( + "create-backup", **{"type": "differential"} + ) + await action.wait() + backup_status = action.results.get("backup-status") + assert backup_status, "backup hasn't succeeded" + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Run the "list backups" action. + logger.info("listing the available backups") + action = await ops_test.model.units.get(replica).run_action("list-backups") + await action.wait() + backups = action.results.get("backups") + # 2 lines for header output, 2 backup lines ==> 4 total lines + assert len(backups.split("\n")) == 4, "differential backup is not outputted" + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Write some data. + logger.info("creating a second table in the database") + with db_connect(host=address, password=password) as connection: + connection.autocommit = True + connection.cursor().execute("CREATE TABLE backup_table_3 (test_collumn INT );") + connection.close() # Scale down to be able to restore. async with ops_test.fast_forward(): await ops_test.model.destroy_unit(replica) @@ -186,14 +213,61 @@ async def test_backup(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm remaining_unit = unit break - # Run the "restore backup" action. + # Run the "restore backup" action for differential backup. for attempt in Retrying( stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) ): with attempt: logger.info("restoring the backup") - most_recent_backup = backups.split("\n")[-1] - backup_id = most_recent_backup.split()[0] + last_diff_backup = backups.split("\n")[-1] + backup_id = last_diff_backup.split()[0] + action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) + await action.wait() + restore_status = action.results.get("restore-status") + assert restore_status, "restore hasn't succeeded" + + # Wait for the restore to complete. + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Check that the backup was correctly restored by having only the first created table. + logger.info("checking that the backup was correctly restored") + primary = await get_primary(ops_test, remaining_unit.name) + address = get_unit_address(ops_test, primary) + with db_connect( + host=address, password=password + ) as connection, connection.cursor() as cursor: + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" + ) + assert cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" + ) + assert cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_2' doesn't exist" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" + ) + assert not cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_3' exists" + connection.close() + + # Run the "restore backup" action for full backup. + for attempt in Retrying( + stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) + ): + with attempt: + logger.info("restoring the backup") + last_full_backup = backups.split("\n")[-2] + backup_id = last_full_backup.split()[0] action = await remaining_unit.run_action("restore", **{"backup-id": backup_id}) await action.wait() restore_status = action.results.get("restore-status") @@ -224,6 +298,13 @@ async def test_backup(ops_test: OpsTest, cloud_configs: Tuple[Dict, Dict], charm assert not cursor.fetchone()[ 0 ], "backup wasn't correctly restored: table 'backup_table_2' exists" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" + ) + assert not cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_3' exists" connection.close() # Run the following steps only in one cloud (it's enough for those checks). diff --git a/tests/integration/test_plugins.py b/tests/integration/test_plugins.py index ce395469a9..5d78dcd3aa 100644 --- a/tests/integration/test_plugins.py +++ b/tests/integration/test_plugins.py @@ -78,11 +78,14 @@ ADDRESS_STANDARDIZER_EXTENSION_STATEMENT = "SELECT num, street, city, zip, zipplus FROM parse_address('1 Devonshire Place, Boston, MA 02109-1234');" ADDRESS_STANDARDIZER_DATA_US_EXTENSION_STATEMENT = "SELECT house_num, name, suftype, city, country, state, unit FROM standardize_address('us_lex', 'us_gaz', 'us_rules', 'One Devonshire Place, PH 301, Boston, MA 02109');" POSTGIS_TIGER_GEOCODER_EXTENSION_STATEMENT = "SELECT * FROM standardize_address('tiger.pagc_lex', 'tiger.pagc_gaz', 'tiger.pagc_rules', 'One Devonshire Place, PH 301, Boston, MA 02109-1234');" -POSTGIS_TOPOLOGY_STATEMENT = "SELECT topology.CreateTopology('nyc_topo', 26918, 0.5);" -POSTGIS_RASTER_STATEMENT = "CREATE TABLE test_postgis_raster (name varchar, rast raster);" +POSTGIS_TOPOLOGY_EXTENSION_STATEMENT = "SELECT topology.CreateTopology('nyc_topo', 26918, 0.5);" +POSTGIS_RASTER_EXTENSION_STATEMENT = ( + "CREATE TABLE test_postgis_raster (name varchar, rast raster);" +) VECTOR_EXTENSION_STATEMENT = ( "CREATE TABLE vector_test (id bigserial PRIMARY KEY, embedding vector(3));" ) +TIMESCALEDB_EXTENSION_STATEMENT = "CREATE TABLE test_timescaledb (time TIMESTAMPTZ NOT NULL); SELECT create_hypertable('test_timescaledb', 'time');" @pytest.mark.group(1) @@ -153,9 +156,10 @@ async def test_plugins(ops_test: OpsTest) -> None: "plugin_address_standardizer_enable": ADDRESS_STANDARDIZER_EXTENSION_STATEMENT, "plugin_address_standardizer_data_us_enable": ADDRESS_STANDARDIZER_DATA_US_EXTENSION_STATEMENT, "plugin_postgis_tiger_geocoder_enable": POSTGIS_TIGER_GEOCODER_EXTENSION_STATEMENT, - "plugin_postgis_raster_enable": POSTGIS_RASTER_STATEMENT, - "plugin_postgis_topology_enable": POSTGIS_TOPOLOGY_STATEMENT, + "plugin_postgis_raster_enable": POSTGIS_RASTER_EXTENSION_STATEMENT, + "plugin_postgis_topology_enable": POSTGIS_TOPOLOGY_EXTENSION_STATEMENT, "plugin_vector_enable": VECTOR_EXTENSION_STATEMENT, + "plugin_timescaledb_enable": TIMESCALEDB_EXTENSION_STATEMENT, } def enable_disable_config(enabled: False): diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index 0363757196..868a1bc3fc 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -607,15 +607,15 @@ def test_format_backup_list(harness): # Test when there are backups. backup_list = [ - ("2023-01-01T09:00:00Z", "physical", "failed: fake error"), - ("2023-01-01T10:00:00Z", "physical", "finished"), + ("2023-01-01T09:00:00Z", "full", "failed: fake error"), + ("2023-01-01T10:00:00Z", "full", "finished"), ] tc.assertEqual( harness.charm.backup._format_backup_list(backup_list), """backup-id | backup-type | backup-status ---------------------------------------------------- -2023-01-01T09:00:00Z | physical | failed: fake error -2023-01-01T10:00:00Z | physical | finished""", +2023-01-01T09:00:00Z | full | failed: fake error +2023-01-01T10:00:00Z | full | finished""", ) @@ -639,8 +639,8 @@ def test_generate_backup_list_output(harness): harness.charm.backup._generate_backup_list_output(), """backup-id | backup-type | backup-status ---------------------------------------------------- -2023-01-01T09:00:00Z | physical | failed: fake error -2023-01-01T10:00:00Z | physical | finished""", +2023-01-01T09:00:00Z | full | failed: fake error +2023-01-01T10:00:00Z | full | finished""", ) @@ -1178,8 +1178,16 @@ def test_on_create_backup_action(harness): patch("charm.PostgreSQLBackups._retrieve_s3_parameters") as _retrieve_s3_parameters, patch("charm.PostgreSQLBackups._can_unit_perform_backup") as _can_unit_perform_backup, ): - # Test when the unit cannot perform a backup. + # Test when the unit cannot perform a backup because of type. mock_event = MagicMock() + mock_event.params = {"type": "wrong"} + harness.charm.backup._on_create_backup_action(mock_event) + mock_event.fail.assert_called_once() + mock_event.set_results.assert_not_called() + + # Test when the unit cannot perform a backup because of preflight check. + mock_event = MagicMock() + mock_event.params = {"type": "full"} _can_unit_perform_backup.return_value = (False, "fake validation message") harness.charm.backup._on_create_backup_action(mock_event) mock_event.fail.assert_called_once() @@ -1334,15 +1342,15 @@ def test_on_list_backups_action(harness): _generate_backup_list_output.side_effect = None _generate_backup_list_output.return_value = """backup-id | backup-type | backup-status ---------------------------------------------------- - 2023-01-01T09:00:00Z | physical | failed: fake error - 2023-01-01T10:00:00Z | physical | finished""" + 2023-01-01T09:00:00Z | full | failed: fake error + 2023-01-01T10:00:00Z | full | finished""" harness.charm.backup._on_list_backups_action(mock_event) _generate_backup_list_output.assert_called_once() mock_event.set_results.assert_called_once_with({ "backups": """backup-id | backup-type | backup-status ---------------------------------------------------- - 2023-01-01T09:00:00Z | physical | failed: fake error - 2023-01-01T10:00:00Z | physical | finished""" + 2023-01-01T09:00:00Z | full | failed: fake error + 2023-01-01T10:00:00Z | full | finished""" }) mock_event.fail.assert_not_called() @@ -1357,6 +1365,7 @@ def test_on_restore_action(harness): patch("charm.PostgreSQLBackups._execute_command") as _execute_command, patch("charm.Patroni.stop_patroni") as _stop_patroni, patch("charm.PostgreSQLBackups._list_backups") as _list_backups, + patch("charm.PostgreSQLBackups._fetch_backup_from_id") as _fetch_backup_from_id, patch("charm.PostgreSQLBackups._pre_restore_checks") as _pre_restore_checks, ): peer_rel_id = harness.model.get_relation(PEER).id @@ -1383,6 +1392,7 @@ def test_on_restore_action(harness): harness.charm.unit.status = ActiveStatus() harness.charm.backup._on_restore_action(mock_event) _list_backups.assert_called_once_with(show_failed=False) + _fetch_backup_from_id.assert_not_called() mock_event.fail.assert_called_once() _stop_patroni.assert_not_called() _execute_command.assert_not_called() @@ -1425,6 +1435,7 @@ def test_on_restore_action(harness): _restart_database.reset_mock() _empty_data_files.return_value = True _execute_command.return_value = (1, "", "fake stderr") + _fetch_backup_from_id.return_value = "20230101-090000F" tc.assertEqual(harness.get_relation_data(peer_rel_id, harness.charm.app), {}) harness.charm.backup._on_restore_action(mock_event) tc.assertEqual( @@ -1455,6 +1466,7 @@ def test_on_restore_action(harness): mock_event.reset_mock() _restart_database.reset_mock() _execute_command.return_value = (0, "fake stdout", "") + _fetch_backup_from_id.return_value = "20230101-090000F" harness.charm.backup._on_restore_action(mock_event) _restart_database.assert_not_called() mock_event.fail.assert_not_called() diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 48907eb4d0..7370788179 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -530,6 +530,9 @@ def test_enable_disable_extensions(harness, caplog): plugin_vector_enable: default: false type: boolean + plugin_timescaledb_enable: + default: false + type: boolean profile: default: production type: string"""